1 | <?php |
||||||
2 | |||||||
3 | declare(strict_types=1); |
||||||
4 | |||||||
5 | namespace AOE\Crawler\Controller; |
||||||
6 | |||||||
7 | /*************************************************************** |
||||||
8 | * Copyright notice |
||||||
9 | * |
||||||
10 | * (c) 2020 AOE GmbH <[email protected]> |
||||||
11 | * |
||||||
12 | * All rights reserved |
||||||
13 | * |
||||||
14 | * This script is part of the TYPO3 project. The TYPO3 project is |
||||||
15 | * free software; you can redistribute it and/or modify |
||||||
16 | * it under the terms of the GNU General Public License as published by |
||||||
17 | * the Free Software Foundation; either version 3 of the License, or |
||||||
18 | * (at your option) any later version. |
||||||
19 | * |
||||||
20 | * The GNU General Public License can be found at |
||||||
21 | * http://www.gnu.org/copyleft/gpl.html. |
||||||
22 | * |
||||||
23 | * This script is distributed in the hope that it will be useful, |
||||||
24 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
25 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
26 | * GNU General Public License for more details. |
||||||
27 | * |
||||||
28 | * This copyright notice MUST APPEAR in all copies of the script! |
||||||
29 | ***************************************************************/ |
||||||
30 | |||||||
31 | use AOE\Crawler\Configuration\ExtensionConfigurationProvider; |
||||||
32 | use AOE\Crawler\Converter\JsonCompatibilityConverter; |
||||||
33 | use AOE\Crawler\Crawler; |
||||||
34 | use AOE\Crawler\CrawlStrategy\CrawlStrategyFactory; |
||||||
35 | use AOE\Crawler\Domain\Model\Process; |
||||||
36 | use AOE\Crawler\Domain\Repository\ConfigurationRepository; |
||||||
37 | use AOE\Crawler\Domain\Repository\ProcessRepository; |
||||||
38 | use AOE\Crawler\Domain\Repository\QueueRepository; |
||||||
39 | use AOE\Crawler\QueueExecutor; |
||||||
40 | use AOE\Crawler\Service\ConfigurationService; |
||||||
41 | use AOE\Crawler\Service\PageService; |
||||||
42 | use AOE\Crawler\Service\UrlService; |
||||||
43 | use AOE\Crawler\Utility\SignalSlotUtility; |
||||||
44 | use AOE\Crawler\Value\QueueFilter; |
||||||
45 | use PDO; |
||||||
46 | use Psr\Http\Message\UriInterface; |
||||||
47 | use Psr\Log\LoggerAwareInterface; |
||||||
48 | use Psr\Log\LoggerAwareTrait; |
||||||
49 | use TYPO3\CMS\Backend\Tree\View\PageTreeView; |
||||||
50 | use TYPO3\CMS\Backend\Utility\BackendUtility; |
||||||
51 | use TYPO3\CMS\Core\Authentication\BackendUserAuthentication; |
||||||
52 | use TYPO3\CMS\Core\Compatibility\PublicMethodDeprecationTrait; |
||||||
53 | use TYPO3\CMS\Core\Compatibility\PublicPropertyDeprecationTrait; |
||||||
54 | use TYPO3\CMS\Core\Core\Bootstrap; |
||||||
55 | use TYPO3\CMS\Core\Core\Environment; |
||||||
56 | use TYPO3\CMS\Core\Database\Connection; |
||||||
57 | use TYPO3\CMS\Core\Database\ConnectionPool; |
||||||
58 | use TYPO3\CMS\Core\Database\Query\QueryBuilder; |
||||||
59 | use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction; |
||||||
60 | use TYPO3\CMS\Core\Database\QueryGenerator; |
||||||
61 | use TYPO3\CMS\Core\Domain\Repository\PageRepository; |
||||||
62 | use TYPO3\CMS\Core\Exception\SiteNotFoundException; |
||||||
63 | use TYPO3\CMS\Core\Imaging\Icon; |
||||||
64 | use TYPO3\CMS\Core\Imaging\IconFactory; |
||||||
65 | use TYPO3\CMS\Core\Routing\InvalidRouteArgumentsException; |
||||||
66 | use TYPO3\CMS\Core\Site\Entity\Site; |
||||||
67 | use TYPO3\CMS\Core\Type\Bitmask\Permission; |
||||||
68 | use TYPO3\CMS\Core\Utility\DebugUtility; |
||||||
69 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||||||
70 | use TYPO3\CMS\Core\Utility\MathUtility; |
||||||
71 | use TYPO3\CMS\Extbase\Object\ObjectManager; |
||||||
72 | |||||||
73 | /** |
||||||
74 | * Class CrawlerController |
||||||
75 | * |
||||||
76 | * @package AOE\Crawler\Controller |
||||||
77 | */ |
||||||
78 | class CrawlerController implements LoggerAwareInterface |
||||||
79 | { |
||||||
80 | use LoggerAwareTrait; |
||||||
81 | use PublicMethodDeprecationTrait; |
||||||
82 | use PublicPropertyDeprecationTrait; |
||||||
83 | |||||||
84 | /** |
||||||
85 | * @deprecated since 9.2.5 will be removed in v11.x |
||||||
86 | */ |
||||||
87 | public const CLI_STATUS_NOTHING_PROCCESSED = 0; |
||||||
88 | |||||||
89 | /** |
||||||
90 | * queue not empty |
||||||
91 | * @deprecated since 9.2.5 will be removed in v11.x |
||||||
92 | */ |
||||||
93 | public const CLI_STATUS_REMAIN = 1; |
||||||
94 | |||||||
95 | /** |
||||||
96 | * (some) queue items where processed |
||||||
97 | * @deprecated since 9.2.5 will be removed in v11.x |
||||||
98 | */ |
||||||
99 | public const CLI_STATUS_PROCESSED = 2; |
||||||
100 | |||||||
101 | /** |
||||||
102 | * instance didn't finish |
||||||
103 | * @deprecated since 9.2.5 will be removed in v11.x |
||||||
104 | */ |
||||||
105 | public const CLI_STATUS_ABORTED = 4; |
||||||
106 | |||||||
107 | /** |
||||||
108 | * @deprecated since 9.2.5 will be removed in v11.x |
||||||
109 | */ |
||||||
110 | public const CLI_STATUS_POLLABLE_PROCESSED = 8; |
||||||
111 | |||||||
112 | /** |
||||||
113 | * @var integer |
||||||
114 | */ |
||||||
115 | public $setID = 0; |
||||||
116 | |||||||
117 | /** |
||||||
118 | * @var string |
||||||
119 | */ |
||||||
120 | public $processID = ''; |
||||||
121 | |||||||
122 | /** |
||||||
123 | * @var array |
||||||
124 | */ |
||||||
125 | public $duplicateTrack = []; |
||||||
126 | |||||||
127 | /** |
||||||
128 | * @var array |
||||||
129 | */ |
||||||
130 | public $downloadUrls = []; |
||||||
131 | |||||||
132 | /** |
||||||
133 | * @var array |
||||||
134 | */ |
||||||
135 | public $incomingProcInstructions = []; |
||||||
136 | |||||||
137 | /** |
||||||
138 | * @var array |
||||||
139 | */ |
||||||
140 | public $incomingConfigurationSelection = []; |
||||||
141 | |||||||
142 | /** |
||||||
143 | * @var bool |
||||||
144 | */ |
||||||
145 | public $registerQueueEntriesInternallyOnly = false; |
||||||
146 | |||||||
147 | /** |
||||||
148 | * @var array |
||||||
149 | */ |
||||||
150 | public $queueEntries = []; |
||||||
151 | |||||||
152 | /** |
||||||
153 | * @var array |
||||||
154 | */ |
||||||
155 | public $urlList = []; |
||||||
156 | |||||||
157 | /** |
||||||
158 | * @var array |
||||||
159 | */ |
||||||
160 | public $extensionSettings = []; |
||||||
161 | |||||||
162 | /** |
||||||
163 | * Mount Point |
||||||
164 | * |
||||||
165 | * @var bool |
||||||
166 | * Todo: Check what this is used for and adjust the type hint or code, as bool doesn't match the current code. |
||||||
167 | */ |
||||||
168 | public $MP = false; |
||||||
169 | |||||||
170 | /** |
||||||
171 | * @var string |
||||||
172 | * @deprecated |
||||||
173 | */ |
||||||
174 | protected $processFilename; |
||||||
175 | |||||||
176 | /** |
||||||
177 | * Holds the internal access mode can be 'gui','cli' or 'cli_im' |
||||||
178 | * |
||||||
179 | * @var string |
||||||
180 | * @deprecated |
||||||
181 | */ |
||||||
182 | protected $accessMode; |
||||||
183 | |||||||
184 | /** |
||||||
185 | * @var QueueRepository |
||||||
186 | */ |
||||||
187 | protected $queueRepository; |
||||||
188 | |||||||
189 | /** |
||||||
190 | * @var ProcessRepository |
||||||
191 | */ |
||||||
192 | protected $processRepository; |
||||||
193 | |||||||
194 | /** |
||||||
195 | * @var ConfigurationRepository |
||||||
196 | */ |
||||||
197 | protected $configurationRepository; |
||||||
198 | |||||||
199 | /** |
||||||
200 | * @var QueueExecutor |
||||||
201 | */ |
||||||
202 | protected $queueExecutor; |
||||||
203 | |||||||
204 | /** |
||||||
205 | * @var int |
||||||
206 | */ |
||||||
207 | protected $maximumUrlsToCompile = 10000; |
||||||
208 | |||||||
209 | /** |
||||||
210 | * @var IconFactory |
||||||
211 | */ |
||||||
212 | protected $iconFactory; |
||||||
213 | |||||||
214 | /** |
||||||
215 | * @var string[] |
||||||
216 | */ |
||||||
217 | private $deprecatedPublicMethods = [ |
||||||
0 ignored issues
–
show
introduced
by
![]() |
|||||||
218 | 'compileUrls' => 'Using CrawlerController->compileUrls() is deprecated since 9.2.5, and will be removed in v11.x', |
||||||
219 | 'cleanUpOldQueueEntries' => 'Using CrawlerController::cleanUpOldQueueEntries() is deprecated since 9.0.1 and will be removed in v11.x, please use QueueRepository->cleanUpOldQueueEntries() instead.', |
||||||
220 | 'CLI_buildProcessId' => 'Using CrawlerController->CLI_buildProcessId() is deprecated since 9.2.5 and will be removed in v11.x', |
||||||
221 | 'CLI_checkAndAcquireNewProcess' => 'Using CrawlerController->CLI_checkAndAcquireNewProcess() is deprecated since 9.2.5 and will be removed in v11.x', |
||||||
222 | 'CLI_debug' => 'Using CrawlerController->CLI_debug() is deprecated since 9.1.3 and will be removed in v11.x', |
||||||
223 | 'CLI_releaseProcesses' => 'Using CrawlerController->CLI_releaseProcesses() is deprecated since 9.2.2 and will be removed in v11.x', |
||||||
224 | 'CLI_run' => 'Using CrawlerController->CLI_run() is deprecated since 9.2.2 and will be removed in v11.x', |
||||||
225 | 'CLI_runHooks' => 'Using CrawlerController->CLI_runHooks() is deprecated since 9.1.5 and will be removed in v11.x', |
||||||
226 | 'expandExcludeString' => 'Using CrawlerController->expandExcludeString() is deprecated since 9.2.5 and will be removed in v11.x', |
||||||
227 | 'getAccessMode' => 'Using CrawlerController->getAccessMode() is deprecated since 9.1.3 and will be removed in v11.x', |
||||||
228 | 'getLogEntriesForPageId' => 'Using CrawlerController->getLogEntriesForPageId() is deprecated since 9.1.5 and will be remove in v11.x', |
||||||
229 | 'getLogEntriesForSetId' => 'Using crawlerController::getLogEntriesForSetId() is deprecated since 9.0.1 and will be removed in v11.x', |
||||||
230 | 'hasGroupAccess' => 'Using CrawlerController->getLogEntriesForPageId() is deprecated since 9.2.2 and will be remove in v11.x, please use UserService::hasGroupAccess() instead.', |
||||||
231 | 'flushQueue' => 'Using CrawlerController::flushQueue() is deprecated since 9.0.1 and will be removed in v11.x, please use QueueRepository->flushQueue() instead.', |
||||||
232 | 'setAccessMode' => 'Using CrawlerController->setAccessMode() is deprecated since 9.1.3 and will be removed in v11.x', |
||||||
233 | 'getDisabled' => 'Using CrawlerController->getDisabled() is deprecated since 9.1.3 and will be removed in v11.x, please use Crawler->isDisabled() instead', |
||||||
234 | 'setDisabled' => 'Using CrawlerController->setDisabled() is deprecated since 9.1.3 and will be removed in v11.x, please use Crawler->setDisabled() instead', |
||||||
235 | 'getProcessFilename' => 'Using CrawlerController->getProcessFilename() is deprecated since 9.1.3 and will be removed in v11.x', |
||||||
236 | 'setProcessFilename' => 'Using CrawlerController->setProcessFilename() is deprecated since 9.1.3 and will be removed in v11.x', |
||||||
237 | 'getDuplicateRowsIfExist' => 'Using CrawlerController->getDuplicateRowsIfExist() is deprecated since 9.1.4 and will be remove in v11.x, please use QueueRepository->getDuplicateQueueItemsIfExists() instead', |
||||||
238 | 'checkIfPageShouldBeSkipped' => 'Using CrawlerController->checkIfPageShouldBeSkipped() is deprecated since 9.2.5 and will be removed in v11.x', |
||||||
239 | 'swapIfFirstIsLargerThanSecond' => 'Using CrawlerController->swapIfFirstIsLargerThanSecond() is deprecated since 9.2.5, and will be removed in v11.x', |
||||||
240 | 'expandParameters' => 'Using CrawlerController->expandParameters() is deprecated since 9.2.5, and will be removed in v11.x', |
||||||
241 | ]; |
||||||
242 | |||||||
243 | /** |
||||||
244 | * @var string[] |
||||||
245 | */ |
||||||
246 | private $deprecatedPublicProperties = [ |
||||||
247 | 'accessMode' => 'Using CrawlerController->accessMode is deprecated since 9.1.3 and will be removed in v11.x', |
||||||
248 | 'processFilename' => 'Using CrawlerController->accessMode is deprecated since 9.1.3 and will be removed in v11.x', |
||||||
249 | ]; |
||||||
250 | |||||||
251 | /** |
||||||
252 | * @var BackendUserAuthentication|null |
||||||
253 | */ |
||||||
254 | private $backendUser; |
||||||
255 | |||||||
256 | /** |
||||||
257 | * @var integer |
||||||
258 | */ |
||||||
259 | private $scheduledTime = 0; |
||||||
260 | |||||||
261 | /** |
||||||
262 | * @var integer |
||||||
263 | */ |
||||||
264 | private $reqMinute = 0; |
||||||
265 | |||||||
266 | /** |
||||||
267 | * @var bool |
||||||
268 | */ |
||||||
269 | private $submitCrawlUrls = false; |
||||||
270 | |||||||
271 | /** |
||||||
272 | * @var bool |
||||||
273 | */ |
||||||
274 | private $downloadCrawlUrls = false; |
||||||
275 | |||||||
276 | /** |
||||||
277 | * @var PageRepository |
||||||
278 | */ |
||||||
279 | private $pageRepository; |
||||||
280 | |||||||
281 | /** |
||||||
282 | * @var Crawler |
||||||
283 | */ |
||||||
284 | private $crawler; |
||||||
285 | |||||||
286 | /** |
||||||
287 | * @var ConfigurationService |
||||||
288 | */ |
||||||
289 | private $configurationService; |
||||||
290 | |||||||
291 | /** |
||||||
292 | * @var UrlService |
||||||
293 | */ |
||||||
294 | private $urlService; |
||||||
295 | |||||||
296 | /************************************ |
||||||
297 | * |
||||||
298 | * Getting URLs based on Page TSconfig |
||||||
299 | * |
||||||
300 | ************************************/ |
||||||
301 | |||||||
302 | 41 | public function __construct() |
|||||
303 | { |
||||||
304 | 41 | $objectManager = GeneralUtility::makeInstance(ObjectManager::class); |
|||||
305 | 41 | $crawlStrategyFactory = GeneralUtility::makeInstance(CrawlStrategyFactory::class); |
|||||
306 | 41 | $this->queueRepository = $objectManager->get(QueueRepository::class); |
|||||
307 | 41 | $this->processRepository = $objectManager->get(ProcessRepository::class); |
|||||
308 | 41 | $this->configurationRepository = $objectManager->get(ConfigurationRepository::class); |
|||||
309 | 41 | $this->pageRepository = GeneralUtility::makeInstance(PageRepository::class); |
|||||
310 | 41 | $this->queueExecutor = GeneralUtility::makeInstance(QueueExecutor::class, $crawlStrategyFactory); |
|||||
311 | 41 | $this->iconFactory = GeneralUtility::makeInstance(IconFactory::class); |
|||||
312 | 41 | $this->crawler = GeneralUtility::makeInstance(Crawler::class); |
|||||
313 | 41 | $this->configurationService = GeneralUtility::makeInstance(ConfigurationService::class); |
|||||
314 | 41 | $this->urlService = GeneralUtility::makeInstance(UrlService::class); |
|||||
315 | |||||||
316 | 41 | $this->processFilename = Environment::getVarPath() . '/lock/tx_crawler.proc'; |
|||||
0 ignored issues
–
show
The property
AOE\Crawler\Controller\C...oller::$processFilename has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
317 | |||||||
318 | /** @var ExtensionConfigurationProvider $configurationProvider */ |
||||||
319 | 41 | $configurationProvider = GeneralUtility::makeInstance(ExtensionConfigurationProvider::class); |
|||||
320 | 41 | $settings = $configurationProvider->getExtensionConfiguration(); |
|||||
321 | 41 | $this->extensionSettings = is_array($settings) ? $settings : []; |
|||||
322 | |||||||
323 | 41 | if (MathUtility::convertToPositiveInteger($this->extensionSettings['countInARun']) === 0) { |
|||||
324 | $this->extensionSettings['countInARun'] = 100; |
||||||
325 | } |
||||||
326 | |||||||
327 | 41 | $this->extensionSettings['processLimit'] = MathUtility::forceIntegerInRange($this->extensionSettings['processLimit'], 1, 99, 1); |
|||||
328 | 41 | $this->setMaximumUrlsToCompile(MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000)); |
|||||
329 | 41 | } |
|||||
330 | |||||||
331 | 41 | public function setMaximumUrlsToCompile(int $maximumUrlsToCompile): void |
|||||
332 | { |
||||||
333 | 41 | $this->maximumUrlsToCompile = $maximumUrlsToCompile; |
|||||
334 | 41 | } |
|||||
335 | |||||||
336 | /** |
||||||
337 | * Method to set the accessMode can be gui, cli or cli_im |
||||||
338 | * |
||||||
339 | * @return string |
||||||
340 | * @deprecated |
||||||
341 | */ |
||||||
342 | 1 | public function getAccessMode() |
|||||
343 | { |
||||||
344 | 1 | return $this->accessMode; |
|||||
0 ignored issues
–
show
The property
AOE\Crawler\Controller\C...Controller::$accessMode has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
345 | } |
||||||
346 | |||||||
347 | /** |
||||||
348 | * @param string $accessMode |
||||||
349 | * @deprecated |
||||||
350 | */ |
||||||
351 | 1 | public function setAccessMode($accessMode): void |
|||||
352 | { |
||||||
353 | 1 | $this->accessMode = $accessMode; |
|||||
0 ignored issues
–
show
The property
AOE\Crawler\Controller\C...Controller::$accessMode has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
354 | 1 | } |
|||||
355 | |||||||
356 | /** |
||||||
357 | * Set disabled status to prevent processes from being processed |
||||||
358 | * @deprecated |
||||||
359 | */ |
||||||
360 | 3 | public function setDisabled(?bool $disabled = true): void |
|||||
361 | { |
||||||
362 | 3 | if ($disabled) { |
|||||
363 | 2 | GeneralUtility::writeFile($this->processFilename, 'disabled'); |
|||||
0 ignored issues
–
show
The property
AOE\Crawler\Controller\C...oller::$processFilename has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
364 | 1 | } elseif (is_file($this->processFilename)) { |
|||||
0 ignored issues
–
show
The property
AOE\Crawler\Controller\C...oller::$processFilename has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
365 | 1 | unlink($this->processFilename); |
|||||
0 ignored issues
–
show
The property
AOE\Crawler\Controller\C...oller::$processFilename has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
366 | } |
||||||
367 | 3 | } |
|||||
368 | |||||||
369 | /** |
||||||
370 | * Get disable status |
||||||
371 | * @deprecated |
||||||
372 | */ |
||||||
373 | 3 | public function getDisabled(): bool |
|||||
374 | { |
||||||
375 | 3 | return is_file($this->processFilename); |
|||||
0 ignored issues
–
show
The property
AOE\Crawler\Controller\C...oller::$processFilename has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
376 | } |
||||||
377 | |||||||
378 | /** |
||||||
379 | * @param string $filenameWithPath |
||||||
380 | * @deprecated |
||||||
381 | */ |
||||||
382 | 4 | public function setProcessFilename($filenameWithPath): void |
|||||
383 | { |
||||||
384 | 4 | $this->processFilename = $filenameWithPath; |
|||||
0 ignored issues
–
show
The property
AOE\Crawler\Controller\C...oller::$processFilename has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
385 | 4 | } |
|||||
386 | |||||||
387 | /** |
||||||
388 | * @return string |
||||||
389 | * @deprecated |
||||||
390 | */ |
||||||
391 | 1 | public function getProcessFilename() |
|||||
392 | { |
||||||
393 | 1 | return $this->processFilename; |
|||||
0 ignored issues
–
show
The property
AOE\Crawler\Controller\C...oller::$processFilename has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
394 | } |
||||||
395 | |||||||
396 | /** |
||||||
397 | * Sets the extensions settings (unserialized pendant of $TYPO3_CONF_VARS['EXT']['extConf']['crawler']). |
||||||
398 | */ |
||||||
399 | 6 | public function setExtensionSettings(array $extensionSettings): void |
|||||
400 | { |
||||||
401 | 6 | $this->extensionSettings = $extensionSettings; |
|||||
402 | 6 | } |
|||||
403 | |||||||
404 | /** |
||||||
405 | * Check if the given page should be crawled |
||||||
406 | * |
||||||
407 | * @return false|string false if the page should be crawled (not excluded), true / skipMessage if it should be skipped |
||||||
408 | * @deprecated |
||||||
409 | */ |
||||||
410 | public function checkIfPageShouldBeSkipped(array $pageRow) |
||||||
411 | { |
||||||
412 | $pageService = GeneralUtility::makeInstance(PageService::class); |
||||||
413 | return $pageService->checkIfPageShouldBeSkipped($pageRow); |
||||||
414 | } |
||||||
415 | |||||||
416 | /** |
||||||
417 | * Wrapper method for getUrlsForPageId() |
||||||
418 | * It returns an array of configurations and no urls! |
||||||
419 | * |
||||||
420 | * @param array $pageRow Page record with at least dok-type and uid columns. |
||||||
421 | * @param string $skipMessage |
||||||
422 | * @return array |
||||||
423 | * @see getUrlsForPageId() |
||||||
424 | */ |
||||||
425 | 9 | public function getUrlsForPageRow(array $pageRow, &$skipMessage = '') |
|||||
426 | { |
||||||
427 | 9 | if (! is_int($pageRow['uid'])) { |
|||||
428 | $skipMessage = 'PageUid ' . $pageRow['uid'] . ' was not an integer'; |
||||||
429 | return []; |
||||||
430 | } |
||||||
431 | |||||||
432 | 9 | $message = $this->getPageService()->checkIfPageShouldBeSkipped($pageRow); |
|||||
433 | 9 | if ($message === false) { |
|||||
434 | 8 | $res = $this->getUrlsForPageId($pageRow['uid']); |
|||||
435 | 8 | $skipMessage = ''; |
|||||
436 | } else { |
||||||
437 | 1 | $skipMessage = $message; |
|||||
438 | 1 | $res = []; |
|||||
439 | } |
||||||
440 | |||||||
441 | 9 | return $res; |
|||||
442 | } |
||||||
443 | |||||||
444 | /** |
||||||
445 | * Creates a list of URLs from input array (and submits them to queue if asked for) |
||||||
446 | * See Web > Info module script + "indexed_search"'s crawler hook-client using this! |
||||||
447 | * |
||||||
448 | * @param array $vv Information about URLs from pageRow to crawl. |
||||||
449 | * @param array $pageRow Page row |
||||||
450 | * @param int $scheduledTime Unix time to schedule indexing to, typically time() |
||||||
451 | * @param int $reqMinute Number of requests per minute (creates the interleave between requests) |
||||||
452 | * @param bool $submitCrawlUrls If set, submits the URLs to queue |
||||||
453 | * @param bool $downloadCrawlUrls If set (and submitcrawlUrls is false) will fill $downloadUrls with entries) |
||||||
454 | * @param array $duplicateTrack Array which is passed by reference and contains the an id per url to secure we will not crawl duplicates |
||||||
455 | * @param array $downloadUrls Array which will be filled with URLS for download if flag is set. |
||||||
456 | * @param array $incomingProcInstructions Array of processing instructions |
||||||
457 | * @return string List of URLs (meant for display in backend module) |
||||||
458 | */ |
||||||
459 | 7 | public function urlListFromUrlArray( |
|||||
460 | array $vv, |
||||||
461 | array $pageRow, |
||||||
462 | $scheduledTime, |
||||||
463 | $reqMinute, |
||||||
464 | $submitCrawlUrls, |
||||||
465 | $downloadCrawlUrls, |
||||||
466 | array &$duplicateTrack, |
||||||
467 | array &$downloadUrls, |
||||||
468 | array $incomingProcInstructions |
||||||
469 | ) { |
||||||
470 | 7 | if (! is_array($vv['URLs'])) { |
|||||
471 | return 'ERROR - no URL generated'; |
||||||
472 | } |
||||||
473 | 7 | $urlLog = []; |
|||||
474 | 7 | $pageId = (int) $pageRow['uid']; |
|||||
475 | 7 | $configurationHash = $this->getConfigurationHash($vv); |
|||||
476 | 7 | $skipInnerCheck = $this->queueRepository->noUnprocessedQueueEntriesForPageWithConfigurationHashExist($pageId, $configurationHash); |
|||||
477 | |||||||
478 | 7 | $urlService = new UrlService(); |
|||||
479 | |||||||
480 | 7 | foreach ($vv['URLs'] as $urlQuery) { |
|||||
481 | 7 | if (! $this->drawURLs_PIfilter($vv['subCfg']['procInstrFilter'], $incomingProcInstructions)) { |
|||||
482 | continue; |
||||||
483 | } |
||||||
484 | 7 | $url = (string) $urlService->getUrlFromPageAndQueryParameters( |
|||||
485 | 7 | $pageId, |
|||||
486 | $urlQuery, |
||||||
487 | 7 | $vv['subCfg']['baseUrl'] ?? null, |
|||||
488 | 7 | $vv['subCfg']['force_ssl'] ?? 0 |
|||||
489 | ); |
||||||
490 | |||||||
491 | // Create key by which to determine unique-ness: |
||||||
492 | 7 | $uKey = $url . '|' . $vv['subCfg']['userGroups'] . '|' . $vv['subCfg']['procInstrFilter']; |
|||||
493 | |||||||
494 | 7 | if (isset($duplicateTrack[$uKey])) { |
|||||
495 | //if the url key is registered just display it and do not resubmit is |
||||||
496 | $urlLog[] = '<em><span class="text-muted">' . htmlspecialchars($url) . '</span></em>'; |
||||||
497 | } else { |
||||||
498 | // Scheduled time: |
||||||
499 | 7 | $schTime = $scheduledTime + round(count($duplicateTrack) * (60 / $reqMinute)); |
|||||
500 | 7 | $schTime = intval($schTime / 60) * 60; |
|||||
501 | 7 | $formattedDate = BackendUtility::datetime($schTime); |
|||||
502 | 7 | $this->urlList[] = '[' . $formattedDate . '] ' . $url; |
|||||
503 | 7 | $urlList = '[' . $formattedDate . '] ' . htmlspecialchars($url); |
|||||
504 | |||||||
505 | // Submit for crawling! |
||||||
506 | 7 | if ($submitCrawlUrls) { |
|||||
507 | 7 | $added = $this->addUrl( |
|||||
508 | 7 | $pageId, |
|||||
509 | $url, |
||||||
510 | 7 | $vv['subCfg'], |
|||||
511 | $scheduledTime, |
||||||
512 | $configurationHash, |
||||||
513 | $skipInnerCheck |
||||||
514 | ); |
||||||
515 | 7 | if ($added === false) { |
|||||
516 | 7 | $urlList .= ' (URL already existed)'; |
|||||
517 | } |
||||||
518 | } elseif ($downloadCrawlUrls) { |
||||||
519 | $downloadUrls[$url] = $url; |
||||||
520 | } |
||||||
521 | 7 | $urlLog[] = $urlList; |
|||||
522 | } |
||||||
523 | 7 | $duplicateTrack[$uKey] = true; |
|||||
524 | } |
||||||
525 | |||||||
526 | 7 | return implode('<br>', $urlLog); |
|||||
527 | } |
||||||
528 | |||||||
529 | /** |
||||||
530 | * Returns true if input processing instruction is among registered ones. |
||||||
531 | * |
||||||
532 | * @param string $piString PI to test |
||||||
533 | * @param array $incomingProcInstructions Processing instructions |
||||||
534 | * @return boolean |
||||||
535 | */ |
||||||
536 | 8 | public function drawURLs_PIfilter($piString, array $incomingProcInstructions) |
|||||
537 | { |
||||||
538 | 8 | if (empty($incomingProcInstructions)) { |
|||||
539 | 4 | return true; |
|||||
540 | } |
||||||
541 | |||||||
542 | 4 | foreach ($incomingProcInstructions as $pi) { |
|||||
543 | 4 | if (GeneralUtility::inList($piString, $pi)) { |
|||||
544 | 2 | return true; |
|||||
545 | } |
||||||
546 | } |
||||||
547 | 2 | return false; |
|||||
548 | } |
||||||
549 | |||||||
550 | 9 | public function getPageTSconfigForId(int $id): array |
|||||
551 | { |
||||||
552 | 9 | if (! $this->MP) { |
|||||
553 | 9 | $pageTSconfig = BackendUtility::getPagesTSconfig($id); |
|||||
554 | } else { |
||||||
555 | // TODO: Please check, this makes no sense to split a boolean value. |
||||||
556 | [, $mountPointId] = explode('-', $this->MP); |
||||||
0 ignored issues
–
show
$this->MP of type true is incompatible with the type string expected by parameter $string of explode() .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
557 | $pageTSconfig = BackendUtility::getPagesTSconfig($mountPointId); |
||||||
0 ignored issues
–
show
$mountPointId of type string is incompatible with the type integer expected by parameter $id of TYPO3\CMS\Backend\Utilit...ity::getPagesTSconfig() .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
558 | } |
||||||
559 | |||||||
560 | // Call a hook to alter configuration |
||||||
561 | 9 | if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['getPageTSconfigForId'])) { |
|||||
562 | $params = [ |
||||||
563 | 'pageId' => $id, |
||||||
564 | 'pageTSConfig' => &$pageTSconfig, |
||||||
565 | ]; |
||||||
566 | foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['getPageTSconfigForId'] as $userFunc) { |
||||||
567 | GeneralUtility::callUserFunction($userFunc, $params, $this); |
||||||
568 | } |
||||||
569 | } |
||||||
570 | 9 | return $pageTSconfig; |
|||||
571 | } |
||||||
572 | |||||||
573 | /** |
||||||
574 | * This methods returns an array of configurations. |
||||||
575 | * Adds no urls! |
||||||
576 | */ |
||||||
577 | 7 | public function getUrlsForPageId(int $pageId): array |
|||||
578 | { |
||||||
579 | // Get page TSconfig for page ID |
||||||
580 | 7 | $pageTSconfig = $this->getPageTSconfigForId($pageId); |
|||||
581 | |||||||
582 | 7 | $mountPoint = is_string($this->MP) ? $this->MP : ''; |
|||||
0 ignored issues
–
show
|
|||||||
583 | |||||||
584 | 7 | $res = []; |
|||||
585 | |||||||
586 | // Fetch Crawler Configuration from pageTSConfig |
||||||
587 | 7 | $res = $this->configurationService->getConfigurationFromPageTS($pageTSconfig, $pageId, $res, $mountPoint); |
|||||
588 | |||||||
589 | // Get configuration from tx_crawler_configuration records up the rootline |
||||||
590 | 7 | $res = $this->configurationService->getConfigurationFromDatabase($pageId, $res); |
|||||
591 | |||||||
592 | 7 | foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'] ?? [] as $func) { |
|||||
593 | $params = [ |
||||||
594 | 'res' => &$res, |
||||||
595 | ]; |
||||||
596 | GeneralUtility::callUserFunction($func, $params, $this); |
||||||
597 | } |
||||||
598 | 7 | return $res; |
|||||
599 | } |
||||||
600 | |||||||
601 | /** |
||||||
602 | * Find all configurations of subpages of a page |
||||||
603 | * TODO: Write Functional Tests |
||||||
604 | */ |
||||||
605 | 2 | public function getConfigurationsForBranch(int $rootid, int $depth): array |
|||||
606 | { |
||||||
607 | 2 | $configurationsForBranch = []; |
|||||
608 | 2 | $pageTSconfig = $this->getPageTSconfigForId($rootid); |
|||||
609 | 2 | $sets = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.'] ?? []; |
|||||
610 | 2 | foreach ($sets as $key => $value) { |
|||||
611 | if (! is_array($value)) { |
||||||
612 | continue; |
||||||
613 | } |
||||||
614 | $configurationsForBranch[] = substr($key, -1) === '.' ? substr($key, 0, -1) : $key; |
||||||
615 | } |
||||||
616 | 2 | $pids = []; |
|||||
617 | 2 | $rootLine = BackendUtility::BEgetRootLine($rootid); |
|||||
618 | 2 | foreach ($rootLine as $node) { |
|||||
619 | 1 | $pids[] = $node['uid']; |
|||||
620 | } |
||||||
621 | /* @var PageTreeView $tree */ |
||||||
622 | 2 | $tree = GeneralUtility::makeInstance(PageTreeView::class); |
|||||
623 | 2 | $perms_clause = $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW); |
|||||
624 | 2 | $tree->init(empty($perms_clause) ? '' : ('AND ' . $perms_clause)); |
|||||
625 | 2 | $tree->getTree($rootid, $depth, ''); |
|||||
626 | 2 | foreach ($tree->tree as $node) { |
|||||
627 | $pids[] = $node['row']['uid']; |
||||||
628 | } |
||||||
629 | |||||||
630 | 2 | $configurations = $this->configurationRepository->getCrawlerConfigurationRecordsFromRootLine($rootid, $pids); |
|||||
631 | |||||||
632 | 2 | foreach ($configurations as $configuration) { |
|||||
633 | 1 | $configurationsForBranch[] = $configuration['name']; |
|||||
634 | } |
||||||
635 | 2 | return $configurationsForBranch; |
|||||
636 | } |
||||||
637 | |||||||
638 | /** |
||||||
639 | * Check if a user has access to an item |
||||||
640 | * (e.g. get the group list of the current logged in user from $GLOBALS['TSFE']->gr_list) |
||||||
641 | * |
||||||
642 | * @param string $groupList Comma-separated list of (fe_)group UIDs from a user |
||||||
643 | * @param string $accessList Comma-separated list of (fe_)group UIDs of the item to access |
||||||
644 | * @return bool TRUE if at least one of the users group UIDs is in the access list or the access list is empty |
||||||
645 | * @see \TYPO3\CMS\Frontend\Page\PageRepository::getMultipleGroupsWhereClause() |
||||||
646 | * @deprecated |
||||||
647 | * @codeCoverageIgnore |
||||||
648 | */ |
||||||
649 | public function hasGroupAccess($groupList, $accessList) |
||||||
650 | { |
||||||
651 | if (empty($accessList)) { |
||||||
652 | return true; |
||||||
653 | } |
||||||
654 | foreach (GeneralUtility::intExplode(',', $groupList) as $groupUid) { |
||||||
655 | if (GeneralUtility::inList($accessList, $groupUid)) { |
||||||
656 | return true; |
||||||
657 | } |
||||||
658 | } |
||||||
659 | return false; |
||||||
660 | } |
||||||
661 | |||||||
662 | /** |
||||||
663 | * Will expand the parameters configuration to individual values. This follows a certain syntax of the value of each parameter. |
||||||
664 | * Syntax of values: |
||||||
665 | * - Basically: If the value is wrapped in [...] it will be expanded according to the following syntax, otherwise the value is taken literally |
||||||
666 | * - Configuration is splitted by "|" and the parts are processed individually and finally added together |
||||||
667 | * - For each configuration part: |
||||||
668 | * - "[int]-[int]" = Integer range, will be expanded to all values in between, values included, starting from low to high (max. 1000). Example "1-34" or "-40--30" |
||||||
669 | * - "_TABLE:[TCA table name];[_PID:[optional page id, default is current page]];[_ENABLELANG:1]" = Look up of table records from PID, filtering out deleted records. Example "_TABLE:tt_content; _PID:123" |
||||||
670 | * _ENABLELANG:1 picks only original records without their language overlays |
||||||
671 | * - Default: Literal value |
||||||
672 | * |
||||||
673 | * @param array $paramArray Array with key (GET var name) and values (value of GET var which is configuration for expansion) |
||||||
674 | * @param integer $pid Current page ID |
||||||
675 | * @return array |
||||||
676 | * @deprecated |
||||||
677 | * @codeCoverageIgnore |
||||||
678 | */ |
||||||
679 | public function expandParameters($paramArray, $pid) |
||||||
680 | { |
||||||
681 | // Traverse parameter names: |
||||||
682 | foreach ($paramArray as $p => $v) { |
||||||
683 | $v = trim($v); |
||||||
684 | |||||||
685 | // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal |
||||||
686 | if (strpos($v, '[') === 0 && substr($v, -1) === ']') { |
||||||
687 | // So, find the value inside brackets and reset the paramArray value as an array. |
||||||
688 | $v = substr($v, 1, -1); |
||||||
689 | $paramArray[$p] = []; |
||||||
690 | |||||||
691 | // Explode parts and traverse them: |
||||||
692 | $parts = explode('|', $v); |
||||||
693 | foreach ($parts as $pV) { |
||||||
694 | |||||||
695 | // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30) |
||||||
696 | if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($pV), $reg)) { |
||||||
697 | $reg = $this->swapIfFirstIsLargerThanSecond($reg); |
||||||
0 ignored issues
–
show
The function
AOE\Crawler\Controller\C...rstIsLargerThanSecond() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
698 | |||||||
699 | // Traverse range, add values: |
||||||
700 | // Limit to size of range! |
||||||
701 | $runAwayBrake = 1000; |
||||||
702 | for ($a = $reg[1]; $a <= $reg[2]; $a++) { |
||||||
703 | $paramArray[$p][] = $a; |
||||||
704 | $runAwayBrake--; |
||||||
705 | if ($runAwayBrake <= 0) { |
||||||
706 | break; |
||||||
707 | } |
||||||
708 | } |
||||||
709 | } elseif (strpos(trim($pV), '_TABLE:') === 0) { |
||||||
710 | |||||||
711 | // Parse parameters: |
||||||
712 | $subparts = GeneralUtility::trimExplode(';', $pV); |
||||||
713 | $subpartParams = []; |
||||||
714 | foreach ($subparts as $spV) { |
||||||
715 | [$pKey, $pVal] = GeneralUtility::trimExplode(':', $spV); |
||||||
716 | $subpartParams[$pKey] = $pVal; |
||||||
717 | } |
||||||
718 | |||||||
719 | // Table exists: |
||||||
720 | if (isset($GLOBALS['TCA'][$subpartParams['_TABLE']])) { |
||||||
721 | $lookUpPid = isset($subpartParams['_PID']) ? intval($subpartParams['_PID']) : intval($pid); |
||||||
722 | $recursiveDepth = isset($subpartParams['_RECURSIVE']) ? intval($subpartParams['_RECURSIVE']) : 0; |
||||||
723 | $pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid'; |
||||||
724 | $where = $subpartParams['_WHERE'] ?? ''; |
||||||
725 | $addTable = $subpartParams['_ADDTABLE'] ?? ''; |
||||||
726 | |||||||
727 | $fieldName = $subpartParams['_FIELD'] ? $subpartParams['_FIELD'] : 'uid'; |
||||||
728 | if ($fieldName === 'uid' || $GLOBALS['TCA'][$subpartParams['_TABLE']]['columns'][$fieldName]) { |
||||||
729 | $queryBuilder = $this->getQueryBuilder($subpartParams['_TABLE']); |
||||||
730 | |||||||
731 | if ($recursiveDepth > 0) { |
||||||
732 | /** @var QueryGenerator $queryGenerator */ |
||||||
733 | $queryGenerator = GeneralUtility::makeInstance(QueryGenerator::class); |
||||||
734 | $pidList = $queryGenerator->getTreeList($lookUpPid, $recursiveDepth, 0, 1); |
||||||
735 | $pidArray = GeneralUtility::intExplode(',', $pidList); |
||||||
736 | } else { |
||||||
737 | $pidArray = [(string) $lookUpPid]; |
||||||
738 | } |
||||||
739 | |||||||
740 | $queryBuilder->getRestrictions() |
||||||
741 | ->removeAll() |
||||||
742 | ->add(GeneralUtility::makeInstance(DeletedRestriction::class)); |
||||||
743 | |||||||
744 | $queryBuilder |
||||||
745 | ->select($fieldName) |
||||||
746 | ->from($subpartParams['_TABLE']) |
||||||
747 | ->where( |
||||||
748 | $queryBuilder->expr()->in($pidField, $queryBuilder->createNamedParameter($pidArray, Connection::PARAM_INT_ARRAY)), |
||||||
749 | $where |
||||||
750 | ); |
||||||
751 | |||||||
752 | if (! empty($addTable)) { |
||||||
753 | // TODO: Check if this works as intended! |
||||||
754 | $queryBuilder->add('from', $addTable); |
||||||
755 | } |
||||||
756 | $transOrigPointerField = $GLOBALS['TCA'][$subpartParams['_TABLE']]['ctrl']['transOrigPointerField']; |
||||||
757 | |||||||
758 | if ($subpartParams['_ENABLELANG'] && $transOrigPointerField) { |
||||||
759 | $queryBuilder->andWhere( |
||||||
760 | $queryBuilder->expr()->lte( |
||||||
761 | $transOrigPointerField, |
||||||
762 | 0 |
||||||
763 | ) |
||||||
764 | ); |
||||||
765 | } |
||||||
766 | |||||||
767 | $statement = $queryBuilder->execute(); |
||||||
768 | |||||||
769 | $rows = []; |
||||||
770 | while ($row = $statement->fetch()) { |
||||||
0 ignored issues
–
show
The function
Doctrine\DBAL\ForwardCompatibility\Result::fetch() has been deprecated: Use fetchNumeric(), fetchAssociative() or fetchOne() instead.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This function has been deprecated. The supplier of the function has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead. ![]() |
|||||||
771 | $rows[$row[$fieldName]] = $row; |
||||||
772 | } |
||||||
773 | |||||||
774 | if (is_array($rows)) { |
||||||
775 | $paramArray[$p] = array_merge($paramArray[$p], array_keys($rows)); |
||||||
776 | } |
||||||
777 | } |
||||||
778 | } |
||||||
779 | } else { |
||||||
780 | // Just add value: |
||||||
781 | $paramArray[$p][] = $pV; |
||||||
782 | } |
||||||
783 | // Hook for processing own expandParameters place holder |
||||||
784 | if (is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'])) { |
||||||
785 | $_params = [ |
||||||
786 | 'pObj' => &$this, |
||||||
787 | 'paramArray' => &$paramArray, |
||||||
788 | 'currentKey' => $p, |
||||||
789 | 'currentValue' => $pV, |
||||||
790 | 'pid' => $pid, |
||||||
791 | ]; |
||||||
792 | foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $_funcRef) { |
||||||
793 | GeneralUtility::callUserFunction($_funcRef, $_params, $this); |
||||||
794 | } |
||||||
795 | } |
||||||
796 | } |
||||||
797 | |||||||
798 | // Make unique set of values and sort array by key: |
||||||
799 | $paramArray[$p] = array_unique($paramArray[$p]); |
||||||
800 | ksort($paramArray); |
||||||
801 | } else { |
||||||
802 | // Set the literal value as only value in array: |
||||||
803 | $paramArray[$p] = [$v]; |
||||||
804 | } |
||||||
805 | } |
||||||
806 | |||||||
807 | return $paramArray; |
||||||
808 | } |
||||||
809 | |||||||
810 | /** |
||||||
811 | * Compiling URLs from parameter array (output of expandParameters()) |
||||||
812 | * The number of URLs will be the multiplication of the number of parameter values for each key |
||||||
813 | * |
||||||
814 | * @param array $paramArray Output of expandParameters(): Array with keys (GET var names) and for each an array of values |
||||||
815 | * @param array $urls URLs accumulated in this array (for recursion) |
||||||
816 | * @deprecated |
||||||
817 | * @codeCoverageIgnore |
||||||
818 | */ |
||||||
819 | public function compileUrls(array $paramArray, array $urls): array |
||||||
820 | { |
||||||
821 | return $this->urlService->compileUrls($paramArray, $urls, $this->getMaximumUrlsToCompile()); |
||||||
822 | } |
||||||
823 | |||||||
824 | /************************************ |
||||||
825 | * |
||||||
826 | * Crawler log |
||||||
827 | * |
||||||
828 | ************************************/ |
||||||
829 | |||||||
830 | /** |
||||||
831 | * Return array of records from crawler queue for input page ID |
||||||
832 | * |
||||||
833 | * @param integer $id Page ID for which to look up log entries. |
||||||
834 | * @param boolean $doFlush If TRUE, then entries selected at DELETED(!) instead of selected! |
||||||
835 | * @param boolean $doFullFlush |
||||||
836 | * @param integer $itemsPerPage Limit the amount of entries per page default is 10 |
||||||
837 | * @return array |
||||||
838 | * |
||||||
839 | * @deprecated |
||||||
840 | */ |
||||||
841 | 4 | public function getLogEntriesForPageId($id, QueueFilter $queueFilter, $doFlush = false, $doFullFlush = false, $itemsPerPage = 10) |
|||||
0 ignored issues
–
show
The parameter
$doFullFlush is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. ![]() |
|||||||
842 | { |
||||||
843 | 4 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(QueueRepository::TABLE_NAME); |
|||||
844 | $queryBuilder |
||||||
845 | 4 | ->select('*') |
|||||
846 | 4 | ->from(QueueRepository::TABLE_NAME) |
|||||
847 | 4 | ->where( |
|||||
848 | 4 | $queryBuilder->expr()->eq('page_id', $queryBuilder->createNamedParameter($id, PDO::PARAM_INT)) |
|||||
849 | ) |
||||||
850 | 4 | ->orderBy('scheduled', 'DESC'); |
|||||
851 | |||||||
852 | 4 | $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
|||||
853 | 4 | ->getConnectionForTable(QueueRepository::TABLE_NAME) |
|||||
854 | 4 | ->getExpressionBuilder(); |
|||||
855 | 4 | $query = $expressionBuilder->andX(); |
|||||
0 ignored issues
–
show
|
|||||||
856 | // PHPStorm adds the highlight that the $addWhere is immediately overwritten, |
||||||
857 | // but the $query = $expressionBuilder->andX() ensures that the $addWhere is written correctly with AND |
||||||
858 | // between the statements, it's not a mistake in the code. |
||||||
859 | 4 | switch ($queueFilter) { |
|||||
860 | 4 | case 'pending': |
|||||
861 | $queryBuilder->andWhere($queryBuilder->expr()->eq('exec_time', 0)); |
||||||
862 | break; |
||||||
863 | 4 | case 'finished': |
|||||
864 | $queryBuilder->andWhere($queryBuilder->expr()->gt('exec_time', 0)); |
||||||
865 | break; |
||||||
866 | } |
||||||
867 | |||||||
868 | 4 | if ($doFlush) { |
|||||
869 | 2 | $this->queueRepository->flushQueue($queueFilter); |
|||||
870 | } |
||||||
871 | 4 | if ($itemsPerPage > 0) { |
|||||
872 | $queryBuilder |
||||||
873 | 4 | ->setMaxResults((int) $itemsPerPage); |
|||||
874 | } |
||||||
875 | |||||||
876 | 4 | return $queryBuilder->execute()->fetchAll(); |
|||||
877 | } |
||||||
878 | |||||||
879 | /** |
||||||
880 | * Return array of records from crawler queue for input set ID |
||||||
881 | * |
||||||
882 | * @param int $set_id Set ID for which to look up log entries. |
||||||
883 | * @param string $filter Filter: "all" => all entries, "pending" => all that is not yet run, "finished" => all complete ones |
||||||
884 | * @param bool $doFlush If TRUE, then entries selected at DELETED(!) instead of selected! |
||||||
885 | * @param int $itemsPerPage Limit the amount of entries per page default is 10 |
||||||
886 | * @return array |
||||||
887 | * |
||||||
888 | * @deprecated |
||||||
889 | */ |
||||||
890 | 6 | public function getLogEntriesForSetId(int $set_id, string $filter = '', bool $doFlush = false, bool $doFullFlush = false, int $itemsPerPage = 10) |
|||||
891 | { |
||||||
892 | 6 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(QueueRepository::TABLE_NAME); |
|||||
893 | $queryBuilder |
||||||
894 | 6 | ->select('*') |
|||||
895 | 6 | ->from(QueueRepository::TABLE_NAME) |
|||||
896 | 6 | ->where( |
|||||
897 | 6 | $queryBuilder->expr()->eq('set_id', $queryBuilder->createNamedParameter($set_id, PDO::PARAM_INT)) |
|||||
898 | ) |
||||||
899 | 6 | ->orderBy('scheduled', 'DESC'); |
|||||
900 | |||||||
901 | 6 | $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class) |
|||||
902 | 6 | ->getConnectionForTable(QueueRepository::TABLE_NAME) |
|||||
903 | 6 | ->getExpressionBuilder(); |
|||||
904 | 6 | $query = $expressionBuilder->andX(); |
|||||
905 | // PHPStorm adds the highlight that the $addWhere is immediately overwritten, |
||||||
906 | // but the $query = $expressionBuilder->andX() ensures that the $addWhere is written correctly with AND |
||||||
907 | // between the statements, it's not a mistake in the code. |
||||||
908 | 6 | $addWhere = ''; |
|||||
909 | 6 | switch ($filter) { |
|||||
910 | 6 | case 'pending': |
|||||
911 | 1 | $queryBuilder->andWhere($queryBuilder->expr()->eq('exec_time', 0)); |
|||||
912 | 1 | $addWhere = $query->add($expressionBuilder->eq('exec_time', 0)); |
|||||
0 ignored issues
–
show
|
|||||||
913 | 1 | break; |
|||||
914 | 5 | case 'finished': |
|||||
915 | 1 | $queryBuilder->andWhere($queryBuilder->expr()->gt('exec_time', 0)); |
|||||
916 | 1 | $addWhere = $query->add($expressionBuilder->gt('exec_time', 0)); |
|||||
917 | 1 | break; |
|||||
918 | } |
||||||
919 | 6 | if ($doFlush) { |
|||||
920 | 4 | $addWhere = $query->add($expressionBuilder->eq('set_id', (int) $set_id)); |
|||||
921 | 4 | $this->flushQueue($doFullFlush ? '' : $addWhere); |
|||||
0 ignored issues
–
show
The function
AOE\Crawler\Controller\C...ontroller::flushQueue() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
922 | 4 | return []; |
|||||
923 | } |
||||||
924 | 2 | if ($itemsPerPage > 0) { |
|||||
925 | $queryBuilder |
||||||
926 | 2 | ->setMaxResults((int) $itemsPerPage); |
|||||
927 | } |
||||||
928 | |||||||
929 | 2 | return $queryBuilder->execute()->fetchAll(); |
|||||
930 | } |
||||||
931 | |||||||
932 | /** |
||||||
933 | * Adding call back entries to log (called from hooks typically, see indexed search class "class.crawler.php" |
||||||
934 | * |
||||||
935 | * @param integer $setId Set ID |
||||||
936 | * @param array $params Parameters to pass to call back function |
||||||
937 | * @param string $callBack Call back object reference, eg. 'EXT:indexed_search/class.crawler.php:&tx_indexedsearch_crawler' |
||||||
938 | * @param integer $page_id Page ID to attach it to |
||||||
939 | * @param integer $schedule Time at which to activate |
||||||
940 | */ |
||||||
941 | public function addQueueEntry_callBack($setId, $params, $callBack, $page_id = 0, $schedule = 0): void |
||||||
942 | { |
||||||
943 | if (! is_array($params)) { |
||||||
0 ignored issues
–
show
|
|||||||
944 | $params = []; |
||||||
945 | } |
||||||
946 | $params['_CALLBACKOBJ'] = $callBack; |
||||||
947 | |||||||
948 | GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME) |
||||||
949 | ->insert( |
||||||
950 | QueueRepository::TABLE_NAME, |
||||||
951 | [ |
||||||
952 | 'page_id' => (int) $page_id, |
||||||
953 | 'parameters' => json_encode($params), |
||||||
954 | 'scheduled' => (int) $schedule ?: $this->getCurrentTime(), |
||||||
955 | 'exec_time' => 0, |
||||||
956 | 'set_id' => (int) $setId, |
||||||
957 | 'result_data' => '', |
||||||
958 | ] |
||||||
959 | ); |
||||||
960 | } |
||||||
961 | |||||||
962 | /************************************ |
||||||
963 | * |
||||||
964 | * URL setting |
||||||
965 | * |
||||||
966 | ************************************/ |
||||||
967 | |||||||
968 | /** |
||||||
969 | * Setting a URL for crawling: |
||||||
970 | * |
||||||
971 | * @param integer $id Page ID |
||||||
972 | * @param string $url Complete URL |
||||||
973 | * @param array $subCfg Sub configuration array (from TS config) |
||||||
974 | * @param integer $tstamp Scheduled-time |
||||||
975 | * @param string $configurationHash (optional) configuration hash |
||||||
976 | * @param bool $skipInnerDuplicationCheck (optional) skip inner duplication check |
||||||
977 | * @return bool |
||||||
978 | */ |
||||||
979 | 11 | public function addUrl( |
|||||
980 | $id, |
||||||
981 | $url, |
||||||
982 | array $subCfg, |
||||||
983 | $tstamp, |
||||||
984 | $configurationHash = '', |
||||||
985 | $skipInnerDuplicationCheck = false |
||||||
986 | ) { |
||||||
987 | 11 | $urlAdded = false; |
|||||
988 | 11 | $rows = []; |
|||||
989 | |||||||
990 | // Creating parameters: |
||||||
991 | $parameters = [ |
||||||
992 | 11 | 'url' => $url, |
|||||
993 | ]; |
||||||
994 | |||||||
995 | // fe user group simulation: |
||||||
996 | 11 | $uGs = implode(',', array_unique(GeneralUtility::intExplode(',', $subCfg['userGroups'], true))); |
|||||
997 | 11 | if ($uGs) { |
|||||
998 | 1 | $parameters['feUserGroupList'] = $uGs; |
|||||
999 | } |
||||||
1000 | |||||||
1001 | // Setting processing instructions |
||||||
1002 | 11 | $parameters['procInstructions'] = GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']); |
|||||
1003 | 11 | if (is_array($subCfg['procInstrParams.'])) { |
|||||
1004 | 8 | $parameters['procInstrParams'] = $subCfg['procInstrParams.']; |
|||||
1005 | } |
||||||
1006 | |||||||
1007 | // Compile value array: |
||||||
1008 | 11 | $parameters_serialized = json_encode($parameters); |
|||||
1009 | $fieldArray = [ |
||||||
1010 | 11 | 'page_id' => (int) $id, |
|||||
1011 | 11 | 'parameters' => $parameters_serialized, |
|||||
1012 | 11 | 'parameters_hash' => GeneralUtility::shortMD5($parameters_serialized), |
|||||
1013 | 11 | 'configuration_hash' => $configurationHash, |
|||||
1014 | 11 | 'scheduled' => $tstamp, |
|||||
1015 | 11 | 'exec_time' => 0, |
|||||
1016 | 11 | 'set_id' => (int) $this->setID, |
|||||
1017 | 11 | 'result_data' => '', |
|||||
1018 | 11 | 'configuration' => $subCfg['key'], |
|||||
1019 | ]; |
||||||
1020 | |||||||
1021 | 11 | if ($this->registerQueueEntriesInternallyOnly) { |
|||||
1022 | //the entries will only be registered and not stored to the database |
||||||
1023 | 1 | $this->queueEntries[] = $fieldArray; |
|||||
1024 | } else { |
||||||
1025 | 10 | if (! $skipInnerDuplicationCheck) { |
|||||
1026 | // check if there is already an equal entry |
||||||
1027 | 9 | $rows = $this->queueRepository->getDuplicateQueueItemsIfExists( |
|||||
1028 | 9 | (bool) $this->extensionSettings['enableTimeslot'], |
|||||
1029 | $tstamp, |
||||||
1030 | 9 | $this->getCurrentTime(), |
|||||
1031 | 9 | $fieldArray['page_id'], |
|||||
1032 | 9 | $fieldArray['parameters_hash'] |
|||||
1033 | ); |
||||||
1034 | } |
||||||
1035 | |||||||
1036 | 10 | if (empty($rows)) { |
|||||
1037 | 9 | $connectionForCrawlerQueue = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME); |
|||||
1038 | 9 | $connectionForCrawlerQueue->insert( |
|||||
1039 | 9 | QueueRepository::TABLE_NAME, |
|||||
1040 | $fieldArray |
||||||
1041 | ); |
||||||
1042 | 9 | $uid = $connectionForCrawlerQueue->lastInsertId(QueueRepository::TABLE_NAME, 'qid'); |
|||||
1043 | 9 | $rows[] = $uid; |
|||||
1044 | 9 | $urlAdded = true; |
|||||
1045 | |||||||
1046 | 9 | $signalPayload = ['uid' => $uid, 'fieldArray' => $fieldArray]; |
|||||
1047 | 9 | SignalSlotUtility::emitSignal( |
|||||
0 ignored issues
–
show
The function
AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
1048 | 9 | self::class, |
|||||
1049 | 9 | SignalSlotUtility::SIGNAL_URL_ADDED_TO_QUEUE, |
|||||
1050 | $signalPayload |
||||||
1051 | ); |
||||||
1052 | } else { |
||||||
1053 | 5 | $signalPayload = ['rows' => $rows, 'fieldArray' => $fieldArray]; |
|||||
1054 | 5 | SignalSlotUtility::emitSignal( |
|||||
0 ignored issues
–
show
The function
AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
1055 | 5 | self::class, |
|||||
1056 | 5 | SignalSlotUtility::SIGNAL_DUPLICATE_URL_IN_QUEUE, |
|||||
1057 | $signalPayload |
||||||
1058 | ); |
||||||
1059 | } |
||||||
1060 | } |
||||||
1061 | |||||||
1062 | 11 | return $urlAdded; |
|||||
1063 | } |
||||||
1064 | |||||||
1065 | /** |
||||||
1066 | * Returns the current system time |
||||||
1067 | * |
||||||
1068 | * @return int |
||||||
1069 | */ |
||||||
1070 | 4 | public function getCurrentTime() |
|||||
1071 | { |
||||||
1072 | 4 | return time(); |
|||||
1073 | } |
||||||
1074 | |||||||
1075 | /************************************ |
||||||
1076 | * |
||||||
1077 | * URL reading |
||||||
1078 | * |
||||||
1079 | ************************************/ |
||||||
1080 | |||||||
1081 | /** |
||||||
1082 | * Read URL for single queue entry |
||||||
1083 | * |
||||||
1084 | * @param integer $queueId |
||||||
1085 | * @param boolean $force If set, will process even if exec_time has been set! |
||||||
1086 | * |
||||||
1087 | * @return int|null |
||||||
1088 | */ |
||||||
1089 | 2 | public function readUrl($queueId, $force = false, string $processId = '') |
|||||
1090 | { |
||||||
1091 | 2 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(QueueRepository::TABLE_NAME); |
|||||
1092 | 2 | $ret = 0; |
|||||
1093 | 2 | $this->logger->debug('crawler-readurl start ' . microtime(true)); |
|||||
0 ignored issues
–
show
The method
debug() does not exist on null .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed. ![]() |
|||||||
1094 | |||||||
1095 | $queryBuilder |
||||||
1096 | 2 | ->select('*') |
|||||
1097 | 2 | ->from(QueueRepository::TABLE_NAME) |
|||||
1098 | 2 | ->where( |
|||||
1099 | 2 | $queryBuilder->expr()->eq('qid', $queryBuilder->createNamedParameter($queueId, PDO::PARAM_INT)) |
|||||
1100 | ); |
||||||
1101 | 2 | if (! $force) { |
|||||
1102 | $queryBuilder |
||||||
1103 | 2 | ->andWhere('exec_time = 0') |
|||||
1104 | 2 | ->andWhere('process_scheduled > 0'); |
|||||
1105 | } |
||||||
1106 | 2 | $queueRec = $queryBuilder->execute()->fetch(); |
|||||
1107 | |||||||
1108 | 2 | if (! is_array($queueRec)) { |
|||||
1109 | return; |
||||||
1110 | } |
||||||
1111 | |||||||
1112 | 2 | SignalSlotUtility::emitSignal( |
|||||
0 ignored issues
–
show
The function
AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
1113 | 2 | self::class, |
|||||
1114 | 2 | SignalSlotUtility::SIGNAL_QUEUEITEM_PREPROCESS, |
|||||
1115 | 2 | [$queueId, &$queueRec] |
|||||
1116 | ); |
||||||
1117 | |||||||
1118 | // Set exec_time to lock record: |
||||||
1119 | 2 | $field_array = ['exec_time' => $this->getCurrentTime()]; |
|||||
1120 | |||||||
1121 | 2 | if (! empty($processId)) { |
|||||
1122 | //if mulitprocessing is used we need to store the id of the process which has handled this entry |
||||||
1123 | 2 | $field_array['process_id_completed'] = $processId; |
|||||
1124 | } |
||||||
1125 | |||||||
1126 | 2 | GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME) |
|||||
1127 | 2 | ->update( |
|||||
1128 | 2 | QueueRepository::TABLE_NAME, |
|||||
1129 | $field_array, |
||||||
1130 | 2 | ['qid' => (int) $queueId] |
|||||
1131 | ); |
||||||
1132 | |||||||
1133 | 2 | $result = $this->queueExecutor->executeQueueItem($queueRec, $this); |
|||||
1134 | 2 | if ($result['content'] === null) { |
|||||
1135 | $resultData = 'An errors happened'; |
||||||
0 ignored issues
–
show
|
|||||||
1136 | } else { |
||||||
1137 | /** @var JsonCompatibilityConverter $jsonCompatibilityConverter */ |
||||||
1138 | 2 | $jsonCompatibilityConverter = GeneralUtility::makeInstance(JsonCompatibilityConverter::class); |
|||||
1139 | 2 | $resultData = $jsonCompatibilityConverter->convert($result['content']); |
|||||
1140 | |||||||
1141 | //atm there's no need to point to specific pollable extensions |
||||||
1142 | 2 | if (is_array($resultData) && is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pollSuccess'])) { |
|||||
1143 | foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pollSuccess'] as $pollable) { |
||||||
1144 | // only check the success value if the instruction is runnig |
||||||
1145 | // it is important to name the pollSuccess key same as the procInstructions key |
||||||
1146 | if (is_array($resultData['parameters']['procInstructions']) |
||||||
1147 | && in_array( |
||||||
1148 | $pollable, |
||||||
1149 | $resultData['parameters']['procInstructions'], true |
||||||
1150 | ) |
||||||
1151 | ) { |
||||||
1152 | if (! empty($resultData['success'][$pollable]) && $resultData['success'][$pollable]) { |
||||||
1153 | $ret |= self::CLI_STATUS_POLLABLE_PROCESSED; |
||||||
0 ignored issues
–
show
The constant
AOE\Crawler\Controller\C...ATUS_POLLABLE_PROCESSED has been deprecated: since 9.2.5 will be removed in v11.x
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This class constant has been deprecated. The supplier of the class has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead. ![]() |
|||||||
1154 | } |
||||||
1155 | } |
||||||
1156 | } |
||||||
1157 | } |
||||||
1158 | } |
||||||
1159 | // Set result in log which also denotes the end of the processing of this entry. |
||||||
1160 | 2 | $field_array = ['result_data' => json_encode($result)]; |
|||||
1161 | |||||||
1162 | 2 | SignalSlotUtility::emitSignal( |
|||||
0 ignored issues
–
show
The function
AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
1163 | 2 | self::class, |
|||||
1164 | 2 | SignalSlotUtility::SIGNAL_QUEUEITEM_POSTPROCESS, |
|||||
1165 | 2 | [$queueId, &$field_array] |
|||||
1166 | ); |
||||||
1167 | |||||||
1168 | 2 | GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME) |
|||||
1169 | 2 | ->update( |
|||||
1170 | 2 | QueueRepository::TABLE_NAME, |
|||||
1171 | $field_array, |
||||||
1172 | 2 | ['qid' => (int) $queueId] |
|||||
1173 | ); |
||||||
1174 | |||||||
1175 | 2 | $this->logger->debug('crawler-readurl stop ' . microtime(true)); |
|||||
1176 | 2 | return $ret; |
|||||
1177 | } |
||||||
1178 | |||||||
1179 | /** |
||||||
1180 | * Read URL for not-yet-inserted log-entry |
||||||
1181 | * |
||||||
1182 | * @param array $field_array Queue field array, |
||||||
1183 | * |
||||||
1184 | * @return array|bool|mixed|string |
||||||
1185 | */ |
||||||
1186 | public function readUrlFromArray($field_array) |
||||||
1187 | { |
||||||
1188 | // Set exec_time to lock record: |
||||||
1189 | $field_array['exec_time'] = $this->getCurrentTime(); |
||||||
1190 | $connectionForCrawlerQueue = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME); |
||||||
1191 | $connectionForCrawlerQueue->insert( |
||||||
1192 | QueueRepository::TABLE_NAME, |
||||||
1193 | $field_array |
||||||
1194 | ); |
||||||
1195 | $queueId = $field_array['qid'] = $connectionForCrawlerQueue->lastInsertId(QueueRepository::TABLE_NAME, 'qid'); |
||||||
1196 | $result = $this->queueExecutor->executeQueueItem($field_array, $this); |
||||||
1197 | |||||||
1198 | // Set result in log which also denotes the end of the processing of this entry. |
||||||
1199 | $field_array = ['result_data' => json_encode($result)]; |
||||||
1200 | |||||||
1201 | SignalSlotUtility::emitSignal( |
||||||
0 ignored issues
–
show
The function
AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
1202 | self::class, |
||||||
1203 | SignalSlotUtility::SIGNAL_QUEUEITEM_POSTPROCESS, |
||||||
1204 | [$queueId, &$field_array] |
||||||
1205 | ); |
||||||
1206 | |||||||
1207 | $connectionForCrawlerQueue->update( |
||||||
1208 | QueueRepository::TABLE_NAME, |
||||||
1209 | $field_array, |
||||||
1210 | ['qid' => $queueId] |
||||||
1211 | ); |
||||||
1212 | |||||||
1213 | return $result; |
||||||
1214 | } |
||||||
1215 | |||||||
1216 | /***************************** |
||||||
1217 | * |
||||||
1218 | * Compiling URLs to crawl - tools |
||||||
1219 | * |
||||||
1220 | *****************************/ |
||||||
1221 | |||||||
1222 | /** |
||||||
1223 | * @param integer $id Root page id to start from. |
||||||
1224 | * @param integer $depth Depth of tree, 0=only id-page, 1= on sublevel, 99 = infinite |
||||||
1225 | * @param integer $scheduledTime Unix Time when the URL is timed to be visited when put in queue |
||||||
1226 | * @param integer $reqMinute Number of requests per minute (creates the interleave between requests) |
||||||
1227 | * @param boolean $submitCrawlUrls If set, submits the URLs to queue in database (real crawling) |
||||||
1228 | * @param boolean $downloadCrawlUrls If set (and submitcrawlUrls is false) will fill $downloadUrls with entries) |
||||||
1229 | * @param array $incomingProcInstructions Array of processing instructions |
||||||
1230 | * @param array $configurationSelection Array of configuration keys |
||||||
1231 | * @return string |
||||||
1232 | */ |
||||||
1233 | public function getPageTreeAndUrls( |
||||||
1234 | $id, |
||||||
1235 | $depth, |
||||||
1236 | $scheduledTime, |
||||||
1237 | $reqMinute, |
||||||
1238 | $submitCrawlUrls, |
||||||
1239 | $downloadCrawlUrls, |
||||||
1240 | array $incomingProcInstructions, |
||||||
1241 | array $configurationSelection |
||||||
1242 | ) { |
||||||
1243 | $this->scheduledTime = $scheduledTime; |
||||||
1244 | $this->reqMinute = $reqMinute; |
||||||
1245 | $this->submitCrawlUrls = $submitCrawlUrls; |
||||||
1246 | $this->downloadCrawlUrls = $downloadCrawlUrls; |
||||||
1247 | $this->incomingProcInstructions = $incomingProcInstructions; |
||||||
1248 | $this->incomingConfigurationSelection = $configurationSelection; |
||||||
1249 | |||||||
1250 | $this->duplicateTrack = []; |
||||||
1251 | $this->downloadUrls = []; |
||||||
1252 | |||||||
1253 | // Drawing tree: |
||||||
1254 | /* @var PageTreeView $tree */ |
||||||
1255 | $tree = GeneralUtility::makeInstance(PageTreeView::class); |
||||||
1256 | $perms_clause = $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW); |
||||||
1257 | $tree->init('AND ' . $perms_clause); |
||||||
1258 | |||||||
1259 | $pageInfo = BackendUtility::readPageAccess($id, $perms_clause); |
||||||
1260 | if (is_array($pageInfo)) { |
||||||
1261 | // Set root row: |
||||||
1262 | $tree->tree[] = [ |
||||||
1263 | 'row' => $pageInfo, |
||||||
1264 | 'HTML' => $this->iconFactory->getIconForRecord('pages', $pageInfo, Icon::SIZE_SMALL), |
||||||
1265 | ]; |
||||||
1266 | } |
||||||
1267 | |||||||
1268 | // Get branch beneath: |
||||||
1269 | if ($depth) { |
||||||
1270 | $tree->getTree($id, $depth, ''); |
||||||
1271 | } |
||||||
1272 | |||||||
1273 | // Traverse page tree: |
||||||
1274 | $code = ''; |
||||||
1275 | |||||||
1276 | foreach ($tree->tree as $data) { |
||||||
1277 | $this->MP = false; |
||||||
1278 | |||||||
1279 | // recognize mount points |
||||||
1280 | if ($data['row']['doktype'] === PageRepository::DOKTYPE_MOUNTPOINT) { |
||||||
1281 | $mountpage = $this->pageRepository->getPage($data['row']['uid']); |
||||||
1282 | |||||||
1283 | // fetch mounted pages |
||||||
1284 | $this->MP = $mountpage[0]['mount_pid'] . '-' . $data['row']['uid']; |
||||||
0 ignored issues
–
show
The property
$MP was declared of type boolean , but $mountpage[0]['mount_pid...' . $data['row']['uid'] is of type string . Maybe add a type cast?
This check looks for assignments to scalar types that may be of the wrong type. To ensure the code behaves as expected, it may be a good idea to add an explicit type cast. $answer = 42;
$correct = false;
$correct = (bool) $answer;
![]() |
|||||||
1285 | |||||||
1286 | $mountTree = GeneralUtility::makeInstance(PageTreeView::class); |
||||||
1287 | $mountTree->init('AND ' . $perms_clause); |
||||||
1288 | $mountTree->getTree($mountpage[0]['mount_pid'], $depth); |
||||||
1289 | |||||||
1290 | foreach ($mountTree->tree as $mountData) { |
||||||
1291 | $code .= $this->drawURLs_addRowsForPage( |
||||||
1292 | $mountData['row'], |
||||||
1293 | $mountData['HTML'] . BackendUtility::getRecordTitle('pages', $mountData['row'], true) |
||||||
1294 | ); |
||||||
1295 | } |
||||||
1296 | |||||||
1297 | // replace page when mount_pid_ol is enabled |
||||||
1298 | if ($mountpage[0]['mount_pid_ol']) { |
||||||
1299 | $data['row']['uid'] = $mountpage[0]['mount_pid']; |
||||||
1300 | } else { |
||||||
1301 | // if the mount_pid_ol is not set the MP must not be used for the mountpoint page |
||||||
1302 | $this->MP = false; |
||||||
1303 | } |
||||||
1304 | } |
||||||
1305 | |||||||
1306 | $code .= $this->drawURLs_addRowsForPage( |
||||||
1307 | $data['row'], |
||||||
1308 | $data['HTML'] . BackendUtility::getRecordTitle('pages', $data['row'], true) |
||||||
1309 | ); |
||||||
1310 | } |
||||||
1311 | |||||||
1312 | return $code; |
||||||
1313 | } |
||||||
1314 | |||||||
1315 | /** |
||||||
1316 | * Expands exclude string |
||||||
1317 | * |
||||||
1318 | * @param string $excludeString Exclude string |
||||||
1319 | * @return array |
||||||
1320 | * @deprecated |
||||||
1321 | */ |
||||||
1322 | 1 | public function expandExcludeString($excludeString) |
|||||
1323 | { |
||||||
1324 | 1 | return $this->configurationService->expandExcludeString($excludeString); |
|||||
1325 | } |
||||||
1326 | |||||||
1327 | /** |
||||||
1328 | * Create the rows for display of the page tree |
||||||
1329 | * For each page a number of rows are shown displaying GET variable configuration |
||||||
1330 | */ |
||||||
1331 | public function drawURLs_addRowsForPage(array $pageRow, string $pageTitle): string |
||||||
1332 | { |
||||||
1333 | $skipMessage = ''; |
||||||
1334 | |||||||
1335 | // Get list of configurations |
||||||
1336 | $configurations = $this->getUrlsForPageRow($pageRow, $skipMessage); |
||||||
1337 | $configurations = ConfigurationService::removeDisallowedConfigurations($this->incomingConfigurationSelection, $configurations); |
||||||
1338 | |||||||
1339 | // Traverse parameter combinations: |
||||||
1340 | $c = 0; |
||||||
1341 | $content = ''; |
||||||
1342 | if (! empty($configurations)) { |
||||||
1343 | foreach ($configurations as $confKey => $confArray) { |
||||||
1344 | |||||||
1345 | // Title column: |
||||||
1346 | if (! $c) { |
||||||
1347 | $titleClm = '<td rowspan="' . count($configurations) . '">' . $pageTitle . '</td>'; |
||||||
1348 | } else { |
||||||
1349 | $titleClm = ''; |
||||||
1350 | } |
||||||
1351 | |||||||
1352 | if (! in_array($pageRow['uid'], $this->configurationService->expandExcludeString($confArray['subCfg']['exclude'] ?? ''), true)) { |
||||||
1353 | |||||||
1354 | // URL list: |
||||||
1355 | $urlList = $this->urlListFromUrlArray( |
||||||
1356 | $confArray, |
||||||
1357 | $pageRow, |
||||||
1358 | $this->scheduledTime, |
||||||
1359 | $this->reqMinute, |
||||||
1360 | $this->submitCrawlUrls, |
||||||
1361 | $this->downloadCrawlUrls, |
||||||
1362 | $this->duplicateTrack, |
||||||
1363 | $this->downloadUrls, |
||||||
1364 | // if empty the urls won't be filtered by processing instructions |
||||||
1365 | $this->incomingProcInstructions |
||||||
1366 | ); |
||||||
1367 | |||||||
1368 | // Expanded parameters: |
||||||
1369 | $paramExpanded = ''; |
||||||
1370 | $calcAccu = []; |
||||||
1371 | $calcRes = 1; |
||||||
1372 | foreach ($confArray['paramExpanded'] as $gVar => $gVal) { |
||||||
1373 | $paramExpanded .= ' |
||||||
1374 | <tr> |
||||||
1375 | <td>' . htmlspecialchars('&' . $gVar . '=') . '<br/>' . |
||||||
1376 | '(' . count($gVal) . ')' . |
||||||
1377 | '</td> |
||||||
1378 | <td nowrap="nowrap">' . nl2br(htmlspecialchars(implode(chr(10), $gVal))) . '</td> |
||||||
1379 | </tr> |
||||||
1380 | '; |
||||||
1381 | $calcRes *= count($gVal); |
||||||
1382 | $calcAccu[] = count($gVal); |
||||||
1383 | } |
||||||
1384 | $paramExpanded = '<table>' . $paramExpanded . '</table>'; |
||||||
1385 | $paramExpanded .= 'Comb: ' . implode('*', $calcAccu) . '=' . $calcRes; |
||||||
1386 | |||||||
1387 | // Options |
||||||
1388 | $optionValues = ''; |
||||||
1389 | if ($confArray['subCfg']['userGroups']) { |
||||||
1390 | $optionValues .= 'User Groups: ' . $confArray['subCfg']['userGroups'] . '<br/>'; |
||||||
1391 | } |
||||||
1392 | if ($confArray['subCfg']['procInstrFilter']) { |
||||||
1393 | $optionValues .= 'ProcInstr: ' . $confArray['subCfg']['procInstrFilter'] . '<br/>'; |
||||||
1394 | } |
||||||
1395 | |||||||
1396 | // Compile row: |
||||||
1397 | $content .= ' |
||||||
1398 | <tr> |
||||||
1399 | ' . $titleClm . ' |
||||||
1400 | <td>' . htmlspecialchars($confKey) . '</td> |
||||||
1401 | <td>' . nl2br(htmlspecialchars(rawurldecode(trim(str_replace('&', chr(10) . '&', GeneralUtility::implodeArrayForUrl('', $confArray['paramParsed'])))))) . '</td> |
||||||
1402 | <td>' . $paramExpanded . '</td> |
||||||
1403 | <td nowrap="nowrap">' . $urlList . '</td> |
||||||
1404 | <td nowrap="nowrap">' . $optionValues . '</td> |
||||||
1405 | <td nowrap="nowrap">' . DebugUtility::viewArray($confArray['subCfg']['procInstrParams.']) . '</td> |
||||||
1406 | </tr>'; |
||||||
1407 | } else { |
||||||
1408 | $content .= '<tr> |
||||||
1409 | ' . $titleClm . ' |
||||||
1410 | <td>' . htmlspecialchars($confKey) . '</td> |
||||||
1411 | <td colspan="5"><em>No entries</em> (Page is excluded in this configuration)</td> |
||||||
1412 | </tr>'; |
||||||
1413 | } |
||||||
1414 | |||||||
1415 | $c++; |
||||||
1416 | } |
||||||
1417 | } else { |
||||||
1418 | $message = ! empty($skipMessage) ? ' (' . $skipMessage . ')' : ''; |
||||||
1419 | |||||||
1420 | // Compile row: |
||||||
1421 | $content .= ' |
||||||
1422 | <tr> |
||||||
1423 | <td>' . $pageTitle . '</td> |
||||||
1424 | <td colspan="6"><em>No entries</em>' . $message . '</td> |
||||||
1425 | </tr>'; |
||||||
1426 | } |
||||||
1427 | |||||||
1428 | return $content; |
||||||
1429 | } |
||||||
1430 | |||||||
1431 | /***************************** |
||||||
1432 | * |
||||||
1433 | * CLI functions |
||||||
1434 | * |
||||||
1435 | *****************************/ |
||||||
1436 | |||||||
1437 | /** |
||||||
1438 | * Running the functionality of the CLI (crawling URLs from queue) |
||||||
1439 | * @deprecated |
||||||
1440 | * @codeCoverageIgnore |
||||||
1441 | */ |
||||||
1442 | public function CLI_run(int $countInARun, int $sleepTime, int $sleepAfterFinish): int |
||||||
1443 | { |
||||||
1444 | $result = 0; |
||||||
1445 | $counter = 0; |
||||||
1446 | |||||||
1447 | // First, run hooks: |
||||||
1448 | foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['cli_hooks'] ?? [] as $objRef) { |
||||||
1449 | trigger_error( |
||||||
1450 | 'This hook (crawler/cli_hooks) is deprecated since 9.1.5 and will be removed when dropping support for TYPO3 9LTS and 10LTS', |
||||||
1451 | E_USER_DEPRECATED |
||||||
1452 | ); |
||||||
1453 | $hookObj = GeneralUtility::makeInstance($objRef); |
||||||
1454 | if (is_object($hookObj)) { |
||||||
1455 | $hookObj->crawler_init($this); |
||||||
1456 | } |
||||||
1457 | } |
||||||
1458 | |||||||
1459 | // Clean up the queue |
||||||
1460 | $this->queueRepository->cleanupQueue(); |
||||||
1461 | |||||||
1462 | // Select entries: |
||||||
1463 | $rows = $this->queueRepository->fetchRecordsToBeCrawled($countInARun); |
||||||
1464 | |||||||
1465 | if (! empty($rows)) { |
||||||
1466 | $quidList = []; |
||||||
1467 | |||||||
1468 | foreach ($rows as $r) { |
||||||
1469 | $quidList[] = $r['qid']; |
||||||
1470 | } |
||||||
1471 | |||||||
1472 | $processId = $this->CLI_buildProcessId(); |
||||||
0 ignored issues
–
show
The function
AOE\Crawler\Controller\C...r::CLI_buildProcessId() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
1473 | |||||||
1474 | //save the number of assigned queue entries to determine how many have been processed later |
||||||
1475 | $numberOfAffectedRows = $this->queueRepository->updateProcessIdAndSchedulerForQueueIds($quidList, $processId); |
||||||
1476 | $this->processRepository->updateProcessAssignItemsCount($numberOfAffectedRows, $processId); |
||||||
1477 | |||||||
1478 | if ($numberOfAffectedRows !== count($quidList)) { |
||||||
1479 | return ($result | self::CLI_STATUS_ABORTED); |
||||||
0 ignored issues
–
show
The constant
AOE\Crawler\Controller\C...ler::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be removed in v11.x
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This class constant has been deprecated. The supplier of the class has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead. ![]() |
|||||||
1480 | } |
||||||
1481 | |||||||
1482 | foreach ($rows as $r) { |
||||||
1483 | $result |= $this->readUrl($r['qid']); |
||||||
1484 | |||||||
1485 | $counter++; |
||||||
1486 | // Just to relax the system |
||||||
1487 | usleep((int) $sleepTime); |
||||||
1488 | |||||||
1489 | // if during the start and the current read url the cli has been disable we need to return from the function |
||||||
1490 | // mark the process NOT as ended. |
||||||
1491 | if ($this->crawler->isDisabled()) { |
||||||
1492 | return ($result | self::CLI_STATUS_ABORTED); |
||||||
0 ignored issues
–
show
The constant
AOE\Crawler\Controller\C...ler::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be removed in v11.x
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This class constant has been deprecated. The supplier of the class has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead. ![]() |
|||||||
1493 | } |
||||||
1494 | |||||||
1495 | if (! $this->processRepository->isProcessActive($this->CLI_buildProcessId())) { |
||||||
0 ignored issues
–
show
The function
AOE\Crawler\Controller\C...r::CLI_buildProcessId() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
1496 | $this->CLI_debug('conflict / timeout (' . $this->CLI_buildProcessId() . ')'); |
||||||
0 ignored issues
–
show
The function
AOE\Crawler\Controller\C...r::CLI_buildProcessId() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() The function
AOE\Crawler\Controller\C...Controller::CLI_debug() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
1497 | $result |= self::CLI_STATUS_ABORTED; |
||||||
0 ignored issues
–
show
The constant
AOE\Crawler\Controller\C...ler::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be removed in v11.x
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This class constant has been deprecated. The supplier of the class has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead. ![]() |
|||||||
1498 | //possible timeout |
||||||
1499 | break; |
||||||
1500 | } |
||||||
1501 | } |
||||||
1502 | |||||||
1503 | sleep((int) $sleepAfterFinish); |
||||||
1504 | } |
||||||
1505 | |||||||
1506 | if ($counter > 0) { |
||||||
1507 | $result |= self::CLI_STATUS_PROCESSED; |
||||||
0 ignored issues
–
show
The constant
AOE\Crawler\Controller\C...r::CLI_STATUS_PROCESSED has been deprecated: since 9.2.5 will be removed in v11.x
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This class constant has been deprecated. The supplier of the class has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead. ![]() |
|||||||
1508 | } |
||||||
1509 | |||||||
1510 | return $result; |
||||||
1511 | } |
||||||
1512 | |||||||
1513 | /** |
||||||
1514 | * Activate hooks |
||||||
1515 | * @deprecated |
||||||
1516 | * @codeCoverageIgnore |
||||||
1517 | */ |
||||||
1518 | public function CLI_runHooks(): void |
||||||
1519 | { |
||||||
1520 | foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['cli_hooks'] ?? [] as $objRef) { |
||||||
1521 | $hookObj = GeneralUtility::makeInstance($objRef); |
||||||
1522 | if (is_object($hookObj)) { |
||||||
1523 | $hookObj->crawler_init($this); |
||||||
1524 | } |
||||||
1525 | } |
||||||
1526 | } |
||||||
1527 | |||||||
1528 | /** |
||||||
1529 | * Try to acquire a new process with the given id |
||||||
1530 | * also performs some auto-cleanup for orphan processes |
||||||
1531 | * @param string $id identification string for the process |
||||||
1532 | * @return boolean |
||||||
1533 | * @todo preemption might not be the most elegant way to clean up |
||||||
1534 | * @deprecated |
||||||
1535 | * @codeCoverageIgnore |
||||||
1536 | */ |
||||||
1537 | public function CLI_checkAndAcquireNewProcess($id) |
||||||
1538 | { |
||||||
1539 | $ret = true; |
||||||
1540 | |||||||
1541 | $systemProcessId = getmypid(); |
||||||
1542 | if (! $systemProcessId) { |
||||||
1543 | return false; |
||||||
1544 | } |
||||||
1545 | |||||||
1546 | $processCount = 0; |
||||||
1547 | $orphanProcesses = []; |
||||||
1548 | |||||||
1549 | $activeProcesses = $this->processRepository->findAllActive(); |
||||||
1550 | $currentTime = $this->getCurrentTime(); |
||||||
1551 | |||||||
1552 | /** @var Process $process */ |
||||||
1553 | foreach ($activeProcesses as $process) { |
||||||
1554 | if ($process->getTtl() < $currentTime) { |
||||||
1555 | $orphanProcesses[] = $process->getProcessId(); |
||||||
1556 | } else { |
||||||
1557 | $processCount++; |
||||||
1558 | } |
||||||
1559 | } |
||||||
1560 | |||||||
1561 | // if there are less than allowed active processes then add a new one |
||||||
1562 | if ($processCount < (int) $this->extensionSettings['processLimit']) { |
||||||
1563 | $this->processRepository->addProcess($id, $systemProcessId); |
||||||
1564 | } else { |
||||||
1565 | $ret = false; |
||||||
1566 | } |
||||||
1567 | |||||||
1568 | $this->processRepository->deleteProcessesMarkedAsDeleted(); |
||||||
1569 | $this->processRepository->markRequestedProcessesAsNotActive($orphanProcesses); |
||||||
1570 | $this->queueRepository->unsetProcessScheduledAndProcessIdForQueueEntries($orphanProcesses); |
||||||
1571 | |||||||
1572 | return $ret; |
||||||
1573 | } |
||||||
1574 | |||||||
1575 | /** |
||||||
1576 | * Release a process and the required resources |
||||||
1577 | * |
||||||
1578 | * @param mixed $releaseIds string with a single process-id or array with multiple process-ids |
||||||
1579 | * @return boolean |
||||||
1580 | * @deprecated |
||||||
1581 | * @codeCoverageIgnore |
||||||
1582 | */ |
||||||
1583 | public function CLI_releaseProcesses($releaseIds) |
||||||
1584 | { |
||||||
1585 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(QueueRepository::TABLE_NAME); |
||||||
1586 | |||||||
1587 | if (! is_array($releaseIds)) { |
||||||
1588 | $releaseIds = [$releaseIds]; |
||||||
1589 | } |
||||||
1590 | |||||||
1591 | if (empty($releaseIds)) { |
||||||
1592 | //nothing to release |
||||||
1593 | return false; |
||||||
1594 | } |
||||||
1595 | |||||||
1596 | // some kind of 2nd chance algo - this way you need at least 2 processes to have a real cleanup |
||||||
1597 | // this ensures that a single process can't mess up the entire process table |
||||||
1598 | |||||||
1599 | // mark all processes as deleted which have no "waiting" queue-entires and which are not active |
||||||
1600 | |||||||
1601 | // ReleaseQueueEntries |
||||||
1602 | $queryBuilder |
||||||
1603 | ->update(QueueRepository::TABLE_NAME, 'q') |
||||||
1604 | ->where( |
||||||
1605 | 'q.process_id IN(SELECT p.process_id FROM tx_crawler_process as p WHERE p.active = 0)' |
||||||
1606 | ) |
||||||
1607 | ->set('q.process_scheduled', 0) |
||||||
1608 | ->set('q.process_id', '') |
||||||
1609 | ->execute(); |
||||||
1610 | |||||||
1611 | // FIXME: Not entirely sure that this is equivalent to the previous version |
||||||
1612 | $queryBuilder->resetQueryPart('set'); |
||||||
1613 | |||||||
1614 | // ReleaseProcessEntries |
||||||
1615 | $queryBuilder |
||||||
1616 | ->update(ProcessRepository::TABLE_NAME) |
||||||
1617 | ->where( |
||||||
1618 | $queryBuilder->expr()->eq('active', 0), |
||||||
1619 | 'process_id IN(SELECT q.process_id FROM tx_crawler_queue as q WHERE q.exec_time = 0)' |
||||||
1620 | ) |
||||||
1621 | ->set('system_process_id', 0) |
||||||
1622 | ->execute(); |
||||||
1623 | |||||||
1624 | $this->processRepository->markRequestedProcessesAsNotActive($releaseIds); |
||||||
1625 | $this->queueRepository->unsetProcessScheduledAndProcessIdForQueueEntries($releaseIds); |
||||||
1626 | |||||||
1627 | return true; |
||||||
1628 | } |
||||||
1629 | |||||||
1630 | /** |
||||||
1631 | * Create a unique Id for the current process |
||||||
1632 | * |
||||||
1633 | * @return string the ID |
||||||
1634 | * @deprecated |
||||||
1635 | * @codeCoverageIgnore |
||||||
1636 | */ |
||||||
1637 | public function CLI_buildProcessId() |
||||||
1638 | { |
||||||
1639 | if (! $this->processID) { |
||||||
1640 | $this->processID = GeneralUtility::shortMD5(microtime(true)); |
||||||
1641 | } |
||||||
1642 | return $this->processID; |
||||||
1643 | } |
||||||
1644 | |||||||
1645 | /** |
||||||
1646 | * Prints a message to the stdout (only if debug-mode is enabled) |
||||||
1647 | * |
||||||
1648 | * @param string $msg the message |
||||||
1649 | * @deprecated |
||||||
1650 | * @codeCoverageIgnore |
||||||
1651 | */ |
||||||
1652 | public function CLI_debug($msg): void |
||||||
1653 | { |
||||||
1654 | if ((int) $this->extensionSettings['processDebug']) { |
||||||
1655 | echo $msg . "\n"; |
||||||
1656 | flush(); |
||||||
1657 | } |
||||||
1658 | } |
||||||
1659 | |||||||
1660 | /** |
||||||
1661 | * Cleans up entries that stayed for too long in the queue. These are: |
||||||
1662 | * - processed entries that are over 1.5 days in age |
||||||
1663 | * - scheduled entries that are over 7 days old |
||||||
1664 | * |
||||||
1665 | * @deprecated |
||||||
1666 | */ |
||||||
1667 | 1 | public function cleanUpOldQueueEntries(): void |
|||||
1668 | { |
||||||
1669 | // 24*60*60 Seconds in 24 hours |
||||||
1670 | 1 | $processedAgeInSeconds = $this->extensionSettings['cleanUpProcessedAge'] * 86400; |
|||||
1671 | 1 | $scheduledAgeInSeconds = $this->extensionSettings['cleanUpScheduledAge'] * 86400; |
|||||
1672 | |||||||
1673 | 1 | $now = time(); |
|||||
1674 | 1 | $condition = '(exec_time<>0 AND exec_time<' . ($now - $processedAgeInSeconds) . ') OR scheduled<=' . ($now - $scheduledAgeInSeconds); |
|||||
1675 | 1 | $this->flushQueue($condition); |
|||||
0 ignored issues
–
show
The function
AOE\Crawler\Controller\C...ontroller::flushQueue() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
1676 | 1 | } |
|||||
1677 | |||||||
1678 | /** |
||||||
1679 | * Removes queue entries |
||||||
1680 | * |
||||||
1681 | * @param string $where SQL related filter for the entries which should be removed |
||||||
1682 | * |
||||||
1683 | * @deprecated |
||||||
1684 | */ |
||||||
1685 | 5 | protected function flushQueue($where = ''): void |
|||||
1686 | { |
||||||
1687 | 5 | $realWhere = strlen((string) $where) > 0 ? $where : '1=1'; |
|||||
1688 | |||||||
1689 | 5 | $queryBuilder = $this->getQueryBuilder(QueueRepository::TABLE_NAME); |
|||||
1690 | |||||||
1691 | $groups = $queryBuilder |
||||||
0 ignored issues
–
show
The function
Doctrine\DBAL\ForwardCom...lity\Result::fetchAll() has been deprecated: Use fetchAllNumeric(), fetchAllAssociative() or fetchFirstColumn() instead.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This function has been deprecated. The supplier of the function has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead. ![]() |
|||||||
1692 | 5 | ->selectLiteral('DISTINCT set_id') |
|||||
1693 | 5 | ->from(QueueRepository::TABLE_NAME) |
|||||
1694 | 5 | ->where($realWhere) |
|||||
1695 | 5 | ->execute() |
|||||
1696 | 5 | ->fetchAll(); |
|||||
1697 | 5 | if (is_array($groups)) { |
|||||
0 ignored issues
–
show
|
|||||||
1698 | 5 | foreach ($groups as $group) { |
|||||
1699 | $subSet = $queryBuilder |
||||||
0 ignored issues
–
show
The function
Doctrine\DBAL\ForwardCom...lity\Result::fetchAll() has been deprecated: Use fetchAllNumeric(), fetchAllAssociative() or fetchFirstColumn() instead.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This function has been deprecated. The supplier of the function has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead. ![]() |
|||||||
1700 | 4 | ->select('qid', 'set_id') |
|||||
1701 | 4 | ->from(QueueRepository::TABLE_NAME) |
|||||
1702 | 4 | ->where( |
|||||
1703 | 4 | $realWhere, |
|||||
1704 | 4 | $queryBuilder->expr()->eq('set_id', $group['set_id']) |
|||||
1705 | ) |
||||||
1706 | 4 | ->execute() |
|||||
1707 | 4 | ->fetchAll(); |
|||||
1708 | |||||||
1709 | 4 | $payLoad = ['subSet' => $subSet]; |
|||||
1710 | 4 | SignalSlotUtility::emitSignal( |
|||||
0 ignored issues
–
show
The function
AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
1711 | 4 | self::class, |
|||||
1712 | 4 | SignalSlotUtility::SIGNAL_QUEUE_ENTRY_FLUSH, |
|||||
1713 | $payLoad |
||||||
1714 | ); |
||||||
1715 | } |
||||||
1716 | } |
||||||
1717 | |||||||
1718 | $queryBuilder |
||||||
1719 | 5 | ->delete(QueueRepository::TABLE_NAME) |
|||||
1720 | 5 | ->where($realWhere) |
|||||
1721 | 5 | ->execute(); |
|||||
1722 | 5 | } |
|||||
1723 | |||||||
1724 | /** |
||||||
1725 | * This method determines duplicates for a queue entry with the same parameters and this timestamp. |
||||||
1726 | * If the timestamp is in the past, it will check if there is any unprocessed queue entry in the past. |
||||||
1727 | * If the timestamp is in the future it will check, if the queued entry has exactly the same timestamp |
||||||
1728 | * |
||||||
1729 | * @param int $tstamp |
||||||
1730 | * @param array $fieldArray |
||||||
1731 | * |
||||||
1732 | * @return array |
||||||
1733 | * @deprecated |
||||||
1734 | */ |
||||||
1735 | 5 | protected function getDuplicateRowsIfExist($tstamp, $fieldArray) |
|||||
1736 | { |
||||||
1737 | 5 | $rows = []; |
|||||
1738 | |||||||
1739 | 5 | $currentTime = $this->getCurrentTime(); |
|||||
1740 | |||||||
1741 | 5 | $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(QueueRepository::TABLE_NAME); |
|||||
1742 | $queryBuilder |
||||||
1743 | 5 | ->select('qid') |
|||||
1744 | 5 | ->from(QueueRepository::TABLE_NAME); |
|||||
1745 | //if this entry is scheduled with "now" |
||||||
1746 | 5 | if ($tstamp <= $currentTime) { |
|||||
1747 | 2 | if ($this->extensionSettings['enableTimeslot']) { |
|||||
1748 | 1 | $timeBegin = $currentTime - 100; |
|||||
1749 | 1 | $timeEnd = $currentTime + 100; |
|||||
1750 | $queryBuilder |
||||||
1751 | 1 | ->where( |
|||||
1752 | 1 | 'scheduled BETWEEN ' . $timeBegin . ' AND ' . $timeEnd . '' |
|||||
1753 | ) |
||||||
1754 | 1 | ->orWhere( |
|||||
1755 | 1 | $queryBuilder->expr()->lte('scheduled', $currentTime) |
|||||
1756 | ); |
||||||
1757 | } else { |
||||||
1758 | $queryBuilder |
||||||
1759 | 1 | ->where( |
|||||
1760 | 2 | $queryBuilder->expr()->lte('scheduled', $currentTime) |
|||||
1761 | ); |
||||||
1762 | } |
||||||
1763 | 3 | } elseif ($tstamp > $currentTime) { |
|||||
1764 | //entry with a timestamp in the future need to have the same schedule time |
||||||
1765 | $queryBuilder |
||||||
1766 | 3 | ->where( |
|||||
1767 | 3 | $queryBuilder->expr()->eq('scheduled', $tstamp) |
|||||
1768 | ); |
||||||
1769 | } |
||||||
1770 | |||||||
1771 | $queryBuilder |
||||||
1772 | 5 | ->andWhere('NOT exec_time') |
|||||
1773 | 5 | ->andWhere('NOT process_id') |
|||||
1774 | 5 | ->andWhere($queryBuilder->expr()->eq('page_id', $queryBuilder->createNamedParameter($fieldArray['page_id'], PDO::PARAM_INT))) |
|||||
1775 | 5 | ->andWhere($queryBuilder->expr()->eq('parameters_hash', $queryBuilder->createNamedParameter($fieldArray['parameters_hash'], PDO::PARAM_STR))); |
|||||
1776 | |||||||
1777 | 5 | $statement = $queryBuilder->execute(); |
|||||
1778 | |||||||
1779 | 5 | while ($row = $statement->fetch()) { |
|||||
1780 | 5 | $rows[] = $row['qid']; |
|||||
1781 | } |
||||||
1782 | |||||||
1783 | 5 | return $rows; |
|||||
1784 | } |
||||||
1785 | |||||||
1786 | /** |
||||||
1787 | * Returns a md5 hash generated from a serialized configuration array. |
||||||
1788 | * |
||||||
1789 | * @return string |
||||||
1790 | */ |
||||||
1791 | 13 | protected function getConfigurationHash(array $configuration) |
|||||
1792 | { |
||||||
1793 | 13 | unset($configuration['paramExpanded']); |
|||||
1794 | 13 | unset($configuration['URLs']); |
|||||
1795 | 13 | return md5(serialize($configuration)); |
|||||
1796 | } |
||||||
1797 | |||||||
1798 | /** |
||||||
1799 | * Build a URL from a Page and the Query String. If the page has a Site configuration, it can be built by using |
||||||
1800 | * the Site instance. |
||||||
1801 | * |
||||||
1802 | * @param int $httpsOrHttp see tx_crawler_configuration.force_ssl |
||||||
1803 | * @throws SiteNotFoundException |
||||||
1804 | * @throws InvalidRouteArgumentsException |
||||||
1805 | * |
||||||
1806 | * @deprecated Using CrawlerController::getUrlFromPageAndQueryParameters() is deprecated since 9.1.1 and will be removed in v11.x, please use UrlService->getUrlFromPageAndQueryParameters() instead. |
||||||
1807 | * @codeCoverageIgnore |
||||||
1808 | */ |
||||||
1809 | protected function getUrlFromPageAndQueryParameters(int $pageId, string $queryString, ?string $alternativeBaseUrl, int $httpsOrHttp): UriInterface |
||||||
1810 | { |
||||||
1811 | $urlService = new UrlService(); |
||||||
1812 | return $urlService->getUrlFromPageAndQueryParameters($pageId, $queryString, $alternativeBaseUrl, $httpsOrHttp); |
||||||
1813 | } |
||||||
1814 | |||||||
1815 | /** |
||||||
1816 | * @deprecated |
||||||
1817 | */ |
||||||
1818 | 1 | protected function swapIfFirstIsLargerThanSecond(array $reg): array |
|||||
1819 | { |
||||||
1820 | // Swap if first is larger than last: |
||||||
1821 | 1 | if ($reg[1] > $reg[2]) { |
|||||
1822 | $temp = $reg[2]; |
||||||
1823 | $reg[2] = $reg[1]; |
||||||
1824 | $reg[1] = $temp; |
||||||
1825 | } |
||||||
1826 | |||||||
1827 | 1 | return $reg; |
|||||
1828 | } |
||||||
1829 | |||||||
1830 | 7 | protected function getPageService(): PageService |
|||||
1831 | { |
||||||
1832 | 7 | return new PageService(); |
|||||
1833 | } |
||||||
1834 | |||||||
1835 | private function getMaximumUrlsToCompile(): int |
||||||
1836 | { |
||||||
1837 | return $this->maximumUrlsToCompile; |
||||||
1838 | } |
||||||
1839 | |||||||
1840 | /** |
||||||
1841 | * @return BackendUserAuthentication |
||||||
1842 | */ |
||||||
1843 | 2 | private function getBackendUser() |
|||||
1844 | { |
||||||
1845 | // Make sure the _cli_ user is loaded |
||||||
1846 | 2 | Bootstrap::initializeBackendAuthentication(); |
|||||
1847 | 2 | if ($this->backendUser === null) { |
|||||
1848 | 2 | $this->backendUser = $GLOBALS['BE_USER']; |
|||||
1849 | } |
||||||
1850 | 2 | return $this->backendUser; |
|||||
1851 | } |
||||||
1852 | |||||||
1853 | /** |
||||||
1854 | * Get querybuilder for given table |
||||||
1855 | * |
||||||
1856 | * @return QueryBuilder |
||||||
1857 | */ |
||||||
1858 | 11 | private function getQueryBuilder(string $table) |
|||||
1859 | { |
||||||
1860 | 11 | return GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($table); |
|||||
1861 | } |
||||||
1862 | } |
||||||
1863 |