Passed
Push — main ( fcc530...d42fc3 )
by Tomas Norre
28:03 queued 21:49
created

CrawlerController::getPageService()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
c 0
b 0
f 0
nc 1
nop 0
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Controller;
6
7
/***************************************************************
8
 *  Copyright notice
9
 *
10
 *  (c) 2020 AOE GmbH <[email protected]>
11
 *
12
 *  All rights reserved
13
 *
14
 *  This script is part of the TYPO3 project. The TYPO3 project is
15
 *  free software; you can redistribute it and/or modify
16
 *  it under the terms of the GNU General Public License as published by
17
 *  the Free Software Foundation; either version 3 of the License, or
18
 *  (at your option) any later version.
19
 *
20
 *  The GNU General Public License can be found at
21
 *  http://www.gnu.org/copyleft/gpl.html.
22
 *
23
 *  This script is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26
 *  GNU General Public License for more details.
27
 *
28
 *  This copyright notice MUST APPEAR in all copies of the script!
29
 ***************************************************************/
30
31
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
32
use AOE\Crawler\Converter\JsonCompatibilityConverter;
33
use AOE\Crawler\Crawler;
34
use AOE\Crawler\CrawlStrategy\CrawlStrategyFactory;
35
use AOE\Crawler\Domain\Model\Process;
36
use AOE\Crawler\Domain\Repository\ConfigurationRepository;
37
use AOE\Crawler\Domain\Repository\ProcessRepository;
38
use AOE\Crawler\Domain\Repository\QueueRepository;
39
use AOE\Crawler\QueueExecutor;
40
use AOE\Crawler\Service\ConfigurationService;
41
use AOE\Crawler\Service\PageService;
42
use AOE\Crawler\Service\UrlService;
43
use AOE\Crawler\Service\UserService;
44
use AOE\Crawler\Utility\SignalSlotUtility;
45
use AOE\Crawler\Value\QueueFilter;
46
use PDO;
47
use Psr\Http\Message\UriInterface;
48
use Psr\Log\LoggerAwareInterface;
49
use Psr\Log\LoggerAwareTrait;
50
use TYPO3\CMS\Backend\Tree\View\PageTreeView;
51
use TYPO3\CMS\Backend\Utility\BackendUtility;
52
use TYPO3\CMS\Core\Authentication\BackendUserAuthentication;
53
use TYPO3\CMS\Core\Compatibility\PublicMethodDeprecationTrait;
54
use TYPO3\CMS\Core\Compatibility\PublicPropertyDeprecationTrait;
55
use TYPO3\CMS\Core\Core\Bootstrap;
56
use TYPO3\CMS\Core\Core\Environment;
57
use TYPO3\CMS\Core\Database\Connection;
58
use TYPO3\CMS\Core\Database\ConnectionPool;
59
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
60
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
61
use TYPO3\CMS\Core\Database\QueryGenerator;
62
use TYPO3\CMS\Core\Domain\Repository\PageRepository;
63
use TYPO3\CMS\Core\Exception\SiteNotFoundException;
64
use TYPO3\CMS\Core\Imaging\Icon;
65
use TYPO3\CMS\Core\Imaging\IconFactory;
66
use TYPO3\CMS\Core\Routing\InvalidRouteArgumentsException;
67
use TYPO3\CMS\Core\Site\Entity\Site;
68
use TYPO3\CMS\Core\Type\Bitmask\Permission;
69
use TYPO3\CMS\Core\TypoScript\Parser\TypoScriptParser;
70
use TYPO3\CMS\Core\Utility\DebugUtility;
71
use TYPO3\CMS\Core\Utility\GeneralUtility;
72
use TYPO3\CMS\Core\Utility\MathUtility;
73
use TYPO3\CMS\Extbase\Object\ObjectManager;
74
75
/**
76
 * Class CrawlerController
77
 *
78
 * @package AOE\Crawler\Controller
79
 */
80
class CrawlerController implements LoggerAwareInterface
81
{
82
    use LoggerAwareTrait;
83
    use PublicMethodDeprecationTrait;
84
    use PublicPropertyDeprecationTrait;
85
86
    public const CLI_STATUS_NOTHING_PROCCESSED = 0;
87
88
    //queue not empty
89
    public const CLI_STATUS_REMAIN = 1;
90
91
    //(some) queue items where processed
92
    public const CLI_STATUS_PROCESSED = 2;
93
94
    //instance didn't finish
95
    public const CLI_STATUS_ABORTED = 4;
96
97
    public const CLI_STATUS_POLLABLE_PROCESSED = 8;
98
99
    /**
100
     * @var integer
101
     */
102
    public $setID = 0;
103
104
    /**
105
     * @var string
106
     */
107
    public $processID = '';
108
109
    /**
110
     * @var array
111
     */
112
    public $duplicateTrack = [];
113
114
    /**
115
     * @var array
116
     */
117
    public $downloadUrls = [];
118
119
    /**
120
     * @var array
121
     */
122
    public $incomingProcInstructions = [];
123
124
    /**
125
     * @var array
126
     */
127
    public $incomingConfigurationSelection = [];
128
129
    /**
130
     * @var bool
131
     */
132
    public $registerQueueEntriesInternallyOnly = false;
133
134
    /**
135
     * @var array
136
     */
137
    public $queueEntries = [];
138
139
    /**
140
     * @var array
141
     */
142
    public $urlList = [];
143
144
    /**
145
     * @var array
146
     */
147
    public $extensionSettings = [];
148
149
    /**
150
     * Mount Point
151
     *
152
     * @var bool
153
     * Todo: Check what this is used for and adjust the type hint or code, as bool doesn't match the current code.
154
     */
155
    public $MP = false;
156
157
    /**
158
     * @var string
159
     * @deprecated
160
     */
161
    protected $processFilename;
162
163
    /**
164
     * Holds the internal access mode can be 'gui','cli' or 'cli_im'
165
     *
166
     * @var string
167
     * @deprecated
168
     */
169
    protected $accessMode;
170
171
    /**
172
     * @var QueueRepository
173
     */
174
    protected $queueRepository;
175
176
    /**
177
     * @var ProcessRepository
178
     */
179
    protected $processRepository;
180
181
    /**
182
     * @var ConfigurationRepository
183
     */
184
    protected $configurationRepository;
185
186
    /**
187
     * @var string
188
     * @deprecated Since v9.2.5 - This will be remove in v10
189
     */
190
    protected $tableName = 'tx_crawler_queue';
191
192
    /**
193
     * @var QueueExecutor
194
     */
195
    protected $queueExecutor;
196
197
    /**
198
     * @var int
199
     */
200
    protected $maximumUrlsToCompile = 10000;
201
202
    /**
203
     * @var IconFactory
204
     */
205
    protected $iconFactory;
206
207
    /**
208
     * @var string[]
209
     */
210
    private $deprecatedPublicMethods = [
0 ignored issues
show
introduced by
The private property $deprecatedPublicMethods is not used, and could be removed.
Loading history...
211
        'cleanUpOldQueueEntries' => 'Using CrawlerController::cleanUpOldQueueEntries() is deprecated since 9.0.1 and will be removed in v11.x, please use QueueRepository->cleanUpOldQueueEntries() instead.',
212
        'CLI_debug' => 'Using CrawlerController->CLI_debug() is deprecated since 9.1.3 and will be removed in v11.x',
213
        'CLI_releaseProcesses' => 'Using CrawlerController->CLI_releaseProcesses() is deprecated since 9.2.2 and will be removed in v11.x',
214
        'CLI_runHooks' => 'Using CrawlerController->CLI_runHooks() is deprecated since 9.1.5 and will be removed in v11.x',
215
        'getAccessMode' => 'Using CrawlerController->getAccessMode() is deprecated since 9.1.3 and will be removed in v11.x',
216
        'getLogEntriesForPageId' => 'Using CrawlerController->getLogEntriesForPageId() is deprecated since 9.1.5 and will be remove in v11.x',
217
        'getLogEntriesForSetId' => 'Using crawlerController::getLogEntriesForSetId() is deprecated since 9.0.1 and will be removed in v11.x',
218
        'hasGroupAccess' => 'Using CrawlerController->getLogEntriesForPageId() is deprecated since 9.2.2 and will be remove in v11.x, please use UserService::hasGroupAccess() instead.',
219
        'flushQueue' => 'Using CrawlerController::flushQueue() is deprecated since 9.0.1 and will be removed in v11.x, please use QueueRepository->flushQueue() instead.',
220
        'setAccessMode' => 'Using CrawlerController->setAccessMode() is deprecated since 9.1.3 and will be removed in v11.x',
221
        'getDisabled' => 'Using CrawlerController->getDisabled() is deprecated since 9.1.3 and will be removed in v11.x, please use Crawler->isDisabled() instead',
222
        'setDisabled' => 'Using CrawlerController->setDisabled() is deprecated since 9.1.3 and will be removed in v11.x, please use Crawler->setDisabled() instead',
223
        'getProcessFilename' => 'Using CrawlerController->getProcessFilename() is deprecated since 9.1.3 and will be removed in v11.x',
224
        'setProcessFilename' => 'Using CrawlerController->setProcessFilename() is deprecated since 9.1.3 and will be removed in v11.x',
225
        'getDuplicateRowsIfExist' => 'Using CrawlerController->getDuplicateRowsIfExist() is deprecated since 9.1.4 and will be remove in v11.x, please use QueueRepository->getDuplicateQueueItemsIfExists() instead',
226
        'checkIfPageShouldBeSkipped' => 'Using CrawlerController->checkIfPageShouldBeSkipped() is deprecated since 9.2.5 and will be removed in v11.x'
227
    ];
228
229
    /**
230
     * @var string[]
231
     */
232
    private $deprecatedPublicProperties = [
233
        'accessMode' => 'Using CrawlerController->accessMode is deprecated since 9.1.3 and will be removed in v11.x',
234
        'processFilename' => 'Using CrawlerController->accessMode is deprecated since 9.1.3 and will be removed in v11.x',
235
    ];
236
237
    /**
238
     * @var BackendUserAuthentication|null
239
     */
240
    private $backendUser;
241
242
    /**
243
     * @var integer
244
     */
245
    private $scheduledTime = 0;
246
247
    /**
248
     * @var integer
249
     */
250
    private $reqMinute = 0;
251
252
    /**
253
     * @var bool
254
     */
255
    private $submitCrawlUrls = false;
256
257
    /**
258
     * @var bool
259
     */
260
    private $downloadCrawlUrls = false;
261
262
    /**
263
     * @var PageRepository
264
     */
265
    private $pageRepository;
266
267
    /**
268
     * @var Crawler
269
     */
270
    private $crawler;
271
272
    /************************************
273
     *
274
     * Getting URLs based on Page TSconfig
275
     *
276
     ************************************/
277
278 41
    public function __construct()
279
    {
280 41
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
281 41
        $crawlStrategyFactory = GeneralUtility::makeInstance(CrawlStrategyFactory::class);
282 41
        $this->queueRepository = $objectManager->get(QueueRepository::class);
283 41
        $this->processRepository = $objectManager->get(ProcessRepository::class);
284 41
        $this->configurationRepository = $objectManager->get(ConfigurationRepository::class);
285 41
        $this->pageRepository = GeneralUtility::makeInstance(PageRepository::class);
286 41
        $this->queueExecutor = GeneralUtility::makeInstance(QueueExecutor::class, $crawlStrategyFactory);
287 41
        $this->iconFactory = GeneralUtility::makeInstance(IconFactory::class);
288 41
        $this->crawler = GeneralUtility::makeInstance(Crawler::class);
289
290 41
        $this->processFilename = Environment::getVarPath() . '/lock/tx_crawler.proc';
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

290
        /** @scrutinizer ignore-deprecated */ $this->processFilename = Environment::getVarPath() . '/lock/tx_crawler.proc';
Loading history...
291
292
        /** @var ExtensionConfigurationProvider $configurationProvider */
293 41
        $configurationProvider = GeneralUtility::makeInstance(ExtensionConfigurationProvider::class);
294 41
        $settings = $configurationProvider->getExtensionConfiguration();
295 41
        $this->extensionSettings = is_array($settings) ? $settings : [];
296
297 41
        if (MathUtility::convertToPositiveInteger($this->extensionSettings['countInARun']) === 0) {
298
            $this->extensionSettings['countInARun'] = 100;
299
        }
300
301 41
        $this->extensionSettings['processLimit'] = MathUtility::forceIntegerInRange($this->extensionSettings['processLimit'], 1, 99, 1);
302 41
        $this->setMaximumUrlsToCompile(MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000));
303 41
    }
304
305 45
    public function setMaximumUrlsToCompile(int $maximumUrlsToCompile): void
306
    {
307 45
        $this->maximumUrlsToCompile = $maximumUrlsToCompile;
308 45
    }
309
310
    /**
311
     * Method to set the accessMode can be gui, cli or cli_im
312
     *
313
     * @return string
314
     * @deprecated
315
     */
316 1
    public function getAccessMode()
317
    {
318 1
        return $this->accessMode;
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...Controller::$accessMode has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

318
        return /** @scrutinizer ignore-deprecated */ $this->accessMode;
Loading history...
319
    }
320
321
    /**
322
     * @param string $accessMode
323
     * @deprecated
324
     */
325 1
    public function setAccessMode($accessMode): void
326
    {
327 1
        $this->accessMode = $accessMode;
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...Controller::$accessMode has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

327
        /** @scrutinizer ignore-deprecated */ $this->accessMode = $accessMode;
Loading history...
328 1
    }
329
330
    /**
331
     * Set disabled status to prevent processes from being processed
332
     * @deprecated
333
     */
334 3
    public function setDisabled(?bool $disabled = true): void
335
    {
336 3
        if ($disabled) {
337 2
            GeneralUtility::writeFile($this->processFilename, 'disabled');
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

337
            GeneralUtility::writeFile(/** @scrutinizer ignore-deprecated */ $this->processFilename, 'disabled');
Loading history...
338 1
        } elseif (is_file($this->processFilename)) {
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

338
        } elseif (is_file(/** @scrutinizer ignore-deprecated */ $this->processFilename)) {
Loading history...
339 1
            unlink($this->processFilename);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

339
            unlink(/** @scrutinizer ignore-deprecated */ $this->processFilename);
Loading history...
340
        }
341 3
    }
342
343
    /**
344
     * Get disable status
345
     * @deprecated
346
     */
347 3
    public function getDisabled(): bool
348
    {
349 3
        return is_file($this->processFilename);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

349
        return is_file(/** @scrutinizer ignore-deprecated */ $this->processFilename);
Loading history...
350
    }
351
352
    /**
353
     * @param string $filenameWithPath
354
     * @deprecated
355
     */
356 4
    public function setProcessFilename($filenameWithPath): void
357
    {
358 4
        $this->processFilename = $filenameWithPath;
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

358
        /** @scrutinizer ignore-deprecated */ $this->processFilename = $filenameWithPath;
Loading history...
359 4
    }
360
361
    /**
362
     * @return string
363
     * @deprecated
364
     */
365 1
    public function getProcessFilename()
366
    {
367 1
        return $this->processFilename;
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

367
        return /** @scrutinizer ignore-deprecated */ $this->processFilename;
Loading history...
368
    }
369
370
    /**
371
     * Sets the extensions settings (unserialized pendant of $TYPO3_CONF_VARS['EXT']['extConf']['crawler']).
372
     */
373 6
    public function setExtensionSettings(array $extensionSettings): void
374
    {
375 6
        $this->extensionSettings = $extensionSettings;
376 6
    }
377
378
    /**
379
     * Check if the given page should be crawled
380
     *
381
     * @return false|string false if the page should be crawled (not excluded), true / skipMessage if it should be skipped
382
     * @deprecated
383
     */
384
    public function checkIfPageShouldBeSkipped(array $pageRow)
385
    {
386
        $pageService = GeneralUtility::makeInstance(PageService::class);
387
        return $pageService->checkIfPageShouldBeSkipped($pageRow);
388
    }
389
390
    /**
391
     * Wrapper method for getUrlsForPageId()
392
     * It returns an array of configurations and no urls!
393
     *
394
     * @param array $pageRow Page record with at least dok-type and uid columns.
395
     * @param string $skipMessage
396
     * @return array
397
     * @see getUrlsForPageId()
398
     */
399 9
    public function getUrlsForPageRow(array $pageRow, &$skipMessage = '')
400
    {
401 9
        if (! is_int($pageRow['uid'])) {
402
            $skipMessage = 'PageUid ' . $pageRow['uid'] . ' was not an integer';
403
            return [];
404
        }
405
406 9
        $message = $this->getPageService()->checkIfPageShouldBeSkipped($pageRow);
407 9
        if ($message === false) {
408 8
            $res = $this->getUrlsForPageId($pageRow['uid']);
409 8
            $skipMessage = '';
410
        } else {
411 1
            $skipMessage = $message;
412 1
            $res = [];
413
        }
414
415 9
        return $res;
416
    }
417
418
    /**
419
     * Creates a list of URLs from input array (and submits them to queue if asked for)
420
     * See Web > Info module script + "indexed_search"'s crawler hook-client using this!
421
     *
422
     * @param array $vv Information about URLs from pageRow to crawl.
423
     * @param array $pageRow Page row
424
     * @param int $scheduledTime Unix time to schedule indexing to, typically time()
425
     * @param int $reqMinute Number of requests per minute (creates the interleave between requests)
426
     * @param bool $submitCrawlUrls If set, submits the URLs to queue
427
     * @param bool $downloadCrawlUrls If set (and submitcrawlUrls is false) will fill $downloadUrls with entries)
428
     * @param array $duplicateTrack Array which is passed by reference and contains the an id per url to secure we will not crawl duplicates
429
     * @param array $downloadUrls Array which will be filled with URLS for download if flag is set.
430
     * @param array $incomingProcInstructions Array of processing instructions
431
     * @return string List of URLs (meant for display in backend module)
432
     */
433 7
    public function urlListFromUrlArray(
434
        array $vv,
435
        array $pageRow,
436
        $scheduledTime,
437
        $reqMinute,
438
        $submitCrawlUrls,
439
        $downloadCrawlUrls,
440
        array &$duplicateTrack,
441
        array &$downloadUrls,
442
        array $incomingProcInstructions
443
    ) {
444 7
        if (! is_array($vv['URLs'])) {
445
            return 'ERROR - no URL generated';
446
        }
447 7
        $urlLog = [];
448 7
        $pageId = (int) $pageRow['uid'];
449 7
        $configurationHash = $this->getConfigurationHash($vv);
450 7
        $skipInnerCheck = $this->queueRepository->noUnprocessedQueueEntriesForPageWithConfigurationHashExist($pageId, $configurationHash);
451
452 7
        $urlService = new UrlService();
453
454 7
        foreach ($vv['URLs'] as $urlQuery) {
455 7
            if (! $this->drawURLs_PIfilter($vv['subCfg']['procInstrFilter'], $incomingProcInstructions)) {
456
                continue;
457
            }
458 7
            $url = (string) $urlService->getUrlFromPageAndQueryParameters(
459 7
                $pageId,
460
                $urlQuery,
461 7
                $vv['subCfg']['baseUrl'] ?? null,
462 7
                $vv['subCfg']['force_ssl'] ?? 0
463
            );
464
465
            // Create key by which to determine unique-ness:
466 7
            $uKey = $url . '|' . $vv['subCfg']['userGroups'] . '|' . $vv['subCfg']['procInstrFilter'];
467
468 7
            if (isset($duplicateTrack[$uKey])) {
469
                //if the url key is registered just display it and do not resubmit is
470
                $urlLog[] = '<em><span class="text-muted">' . htmlspecialchars($url) . '</span></em>';
471
            } else {
472
                // Scheduled time:
473 7
                $schTime = $scheduledTime + round(count($duplicateTrack) * (60 / $reqMinute));
474 7
                $schTime = intval($schTime / 60) * 60;
475 7
                $formattedDate = BackendUtility::datetime($schTime);
476 7
                $this->urlList[] = '[' . $formattedDate . '] ' . $url;
477 7
                $urlList = '[' . $formattedDate . '] ' . htmlspecialchars($url);
478
479
                // Submit for crawling!
480 7
                if ($submitCrawlUrls) {
481 7
                    $added = $this->addUrl(
482 7
                        $pageId,
483
                        $url,
484 7
                        $vv['subCfg'],
485
                        $scheduledTime,
486
                        $configurationHash,
487
                        $skipInnerCheck
488
                    );
489 7
                    if ($added === false) {
490 7
                        $urlList .= ' (URL already existed)';
491
                    }
492
                } elseif ($downloadCrawlUrls) {
493
                    $downloadUrls[$url] = $url;
494
                }
495 7
                $urlLog[] = $urlList;
496
            }
497 7
            $duplicateTrack[$uKey] = true;
498
        }
499
500 7
        return implode('<br>', $urlLog);
501
    }
502
503
    /**
504
     * Returns true if input processing instruction is among registered ones.
505
     *
506
     * @param string $piString PI to test
507
     * @param array $incomingProcInstructions Processing instructions
508
     * @return boolean
509
     */
510 8
    public function drawURLs_PIfilter($piString, array $incomingProcInstructions)
511
    {
512 8
        if (empty($incomingProcInstructions)) {
513 4
            return true;
514
        }
515
516 4
        foreach ($incomingProcInstructions as $pi) {
517 4
            if (GeneralUtility::inList($piString, $pi)) {
518 2
                return true;
519
            }
520
        }
521 2
        return false;
522
    }
523
524 9
    public function getPageTSconfigForId(int $id): array
525
    {
526 9
        if (! $this->MP) {
527 9
            $pageTSconfig = BackendUtility::getPagesTSconfig($id);
528
        } else {
529
            // TODO: Please check, this makes no sense to split a boolean value.
530
            [, $mountPointId] = explode('-', $this->MP);
0 ignored issues
show
Bug introduced by
$this->MP of type true is incompatible with the type string expected by parameter $string of explode(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

530
            [, $mountPointId] = explode('-', /** @scrutinizer ignore-type */ $this->MP);
Loading history...
531
            $pageTSconfig = BackendUtility::getPagesTSconfig($mountPointId);
0 ignored issues
show
Bug introduced by
$mountPointId of type string is incompatible with the type integer expected by parameter $id of TYPO3\CMS\Backend\Utilit...ity::getPagesTSconfig(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

531
            $pageTSconfig = BackendUtility::getPagesTSconfig(/** @scrutinizer ignore-type */ $mountPointId);
Loading history...
532
        }
533
534
        // Call a hook to alter configuration
535 9
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['getPageTSconfigForId'])) {
536
            $params = [
537
                'pageId' => $id,
538
                'pageTSConfig' => &$pageTSconfig,
539
            ];
540
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['getPageTSconfigForId'] as $userFunc) {
541
                GeneralUtility::callUserFunction($userFunc, $params, $this);
542
            }
543
        }
544 9
        return $pageTSconfig;
545
    }
546
547
    /**
548
     * This methods returns an array of configurations.
549
     * Adds no urls!
550
     */
551 7
    public function getUrlsForPageId(int $pageId): array
552
    {
553
        // Get page TSconfig for page ID
554 7
        $pageTSconfig = $this->getPageTSconfigForId($pageId);
555
556 7
        $res = [];
557
558
        // Fetch Crawler Configuration from pageTSconfig
559 7
        $crawlerCfg = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.'] ?? [];
560 7
        foreach ($crawlerCfg as $key => $values) {
561 6
            if (! is_array($values)) {
562 6
                continue;
563
            }
564 6
            $key = str_replace('.', '', $key);
565
            // Sub configuration for a single configuration string:
566 6
            $subCfg = (array) $crawlerCfg[$key . '.'];
567 6
            $subCfg['key'] = $key;
568
569 6
            if (strcmp($subCfg['procInstrFilter'] ?? '', '')) {
570 6
                $subCfg['procInstrFilter'] = implode(',', GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']));
571
            }
572 6
            $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $subCfg['pidsOnly'], true));
573
574
            // process configuration if it is not page-specific or if the specific page is the current page:
575
            // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
576 6
            if (! strcmp((string) $subCfg['pidsOnly'], '') || GeneralUtility::inList($pidOnlyList, strval($pageId))) {
577
578
                // Explode, process etc.:
579 6
                $res[$key] = [];
580 6
                $res[$key]['subCfg'] = $subCfg;
581 6
                $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($crawlerCfg[$key]);
582 6
                $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
583 6
                $res[$key]['origin'] = 'pagets';
584
585
                // recognize MP value
586 6
                if (! $this->MP) {
587 6
                    $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId]);
588
                } else {
589
                    $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId . '&MP=' . $this->MP]);
0 ignored issues
show
Bug introduced by
Are you sure $this->MP of type true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

589
                    $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId . '&MP=' . /** @scrutinizer ignore-type */ $this->MP]);
Loading history...
590
                }
591
            }
592
        }
593
594
        // Get configuration from tx_crawler_configuration records up the rootline
595 7
        $crawlerConfigurations = $this->configurationRepository->getCrawlerConfigurationRecordsFromRootLine($pageId);
596 7
        foreach ($crawlerConfigurations as $configurationRecord) {
597
598
            // check access to the configuration record
599 1
            if (empty($configurationRecord['begroups']) || $this->getBackendUser()->isAdmin() || UserService::hasGroupAccess($this->getBackendUser()->user['usergroup_cached_list'], $configurationRecord['begroups'])) {
600 1
                $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $configurationRecord['pidsonly'], true));
601
602
                // process configuration if it is not page-specific or if the specific page is the current page:
603
                // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
604 1
                if (! strcmp($configurationRecord['pidsonly'], '') || GeneralUtility::inList($pidOnlyList, strval($pageId))) {
605 1
                    $key = $configurationRecord['name'];
606
607
                    // don't overwrite previously defined paramSets
608 1
                    if (! isset($res[$key])) {
609
610
                        /* @var $TSparserObject TypoScriptParser */
611 1
                        $TSparserObject = GeneralUtility::makeInstance(TypoScriptParser::class);
612 1
                        $TSparserObject->parse($configurationRecord['processing_instruction_parameters_ts']);
613
614
                        $subCfg = [
615 1
                            'procInstrFilter' => $configurationRecord['processing_instruction_filter'],
616 1
                            'procInstrParams.' => $TSparserObject->setup,
617 1
                            'baseUrl' => $configurationRecord['base_url'],
618 1
                            'force_ssl' => (int) $configurationRecord['force_ssl'],
619 1
                            'userGroups' => $configurationRecord['fegroups'],
620 1
                            'exclude' => $configurationRecord['exclude'],
621 1
                            'key' => $key,
622
                        ];
623
624 1
                        if (! in_array($pageId, $this->expandExcludeString($subCfg['exclude']), true)) {
625 1
                            $res[$key] = [];
626 1
                            $res[$key]['subCfg'] = $subCfg;
627 1
                            $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($configurationRecord['configuration']);
628 1
                            $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
629 1
                            $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId]);
630 1
                            $res[$key]['origin'] = 'tx_crawler_configuration_' . $configurationRecord['uid'];
631
                        }
632
                    }
633
                }
634
            }
635
        }
636
637 7
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'] ?? [] as $func) {
638
            $params = [
639
                'res' => &$res,
640
            ];
641
            GeneralUtility::callUserFunction($func, $params, $this);
642
        }
643 7
        return $res;
644
    }
645
646
    /**
647
     * Find all configurations of subpages of a page
648
     * TODO: Write Functional Tests
649
     */
650 2
    public function getConfigurationsForBranch(int $rootid, int $depth): array
651
    {
652 2
        $configurationsForBranch = [];
653 2
        $pageTSconfig = $this->getPageTSconfigForId($rootid);
654 2
        $sets = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.'] ?? [];
655 2
        foreach ($sets as $key => $value) {
656
            if (! is_array($value)) {
657
                continue;
658
            }
659
            $configurationsForBranch[] = substr($key, -1) === '.' ? substr($key, 0, -1) : $key;
660
        }
661 2
        $pids = [];
662 2
        $rootLine = BackendUtility::BEgetRootLine($rootid);
663 2
        foreach ($rootLine as $node) {
664 1
            $pids[] = $node['uid'];
665
        }
666
        /* @var PageTreeView $tree */
667 2
        $tree = GeneralUtility::makeInstance(PageTreeView::class);
668 2
        $perms_clause = $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW);
669 2
        $tree->init(empty($perms_clause) ? '' : ('AND ' . $perms_clause));
670 2
        $tree->getTree($rootid, $depth, '');
671 2
        foreach ($tree->tree as $node) {
672
            $pids[] = $node['row']['uid'];
673
        }
674
675 2
        $configurations = $this->configurationRepository->getCrawlerConfigurationRecordsFromRootLine($rootid, $pids);
676
677 2
        foreach($configurations as $configuration) {
678 1
            $configurationsForBranch[] = $configuration['name'];
679
        }
680 2
        return $configurationsForBranch;
681
    }
682
683
    /**
684
     * Check if a user has access to an item
685
     * (e.g. get the group list of the current logged in user from $GLOBALS['TSFE']->gr_list)
686
     *
687
     * @param string $groupList Comma-separated list of (fe_)group UIDs from a user
688
     * @param string $accessList Comma-separated list of (fe_)group UIDs of the item to access
689
     * @return bool TRUE if at least one of the users group UIDs is in the access list or the access list is empty
690
     * @see \TYPO3\CMS\Frontend\Page\PageRepository::getMultipleGroupsWhereClause()
691
     * @deprecated
692
     * @codeCoverageIgnore
693
     */
694
    public function hasGroupAccess($groupList, $accessList)
695
    {
696
        if (empty($accessList)) {
697
            return true;
698
        }
699
        foreach (GeneralUtility::intExplode(',', $groupList) as $groupUid) {
700
            if (GeneralUtility::inList($accessList, $groupUid)) {
701
                return true;
702
            }
703
        }
704
        return false;
705
    }
706
707
    /**
708
     * Will expand the parameters configuration to individual values. This follows a certain syntax of the value of each parameter.
709
     * Syntax of values:
710
     * - Basically: If the value is wrapped in [...] it will be expanded according to the following syntax, otherwise the value is taken literally
711
     * - Configuration is splitted by "|" and the parts are processed individually and finally added together
712
     * - For each configuration part:
713
     *         - "[int]-[int]" = Integer range, will be expanded to all values in between, values included, starting from low to high (max. 1000). Example "1-34" or "-40--30"
714
     *         - "_TABLE:[TCA table name];[_PID:[optional page id, default is current page]];[_ENABLELANG:1]" = Look up of table records from PID, filtering out deleted records. Example "_TABLE:tt_content; _PID:123"
715
     *        _ENABLELANG:1 picks only original records without their language overlays
716
     *         - Default: Literal value
717
     *
718
     * @param array $paramArray Array with key (GET var name) and values (value of GET var which is configuration for expansion)
719
     * @param integer $pid Current page ID
720
     * @return array
721
     *
722
     * TODO: Write Functional Tests
723
     */
724 14
    public function expandParameters($paramArray, $pid)
725
    {
726
        // Traverse parameter names:
727 14
        foreach ($paramArray as $p => $v) {
728 14
            $v = trim($v);
729
730
            // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal
731 14
            if (strpos($v, '[') === 0 && substr($v, -1) === ']') {
732
                // So, find the value inside brackets and reset the paramArray value as an array.
733 14
                $v = substr($v, 1, -1);
734 14
                $paramArray[$p] = [];
735
736
                // Explode parts and traverse them:
737 14
                $parts = explode('|', $v);
738 14
                foreach ($parts as $pV) {
739
740
                    // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30)
741 14
                    if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($pV), $reg)) {
742 1
                        $reg = $this->swapIfFirstIsLargerThanSecond($reg);
743
744
                        // Traverse range, add values:
745
                        // Limit to size of range!
746 1
                        $runAwayBrake = 1000;
747 1
                        for ($a = $reg[1]; $a <= $reg[2]; $a++) {
748 1
                            $paramArray[$p][] = $a;
749 1
                            $runAwayBrake--;
750 1
                            if ($runAwayBrake <= 0) {
751
                                break;
752
                            }
753
                        }
754 13
                    } elseif (strpos(trim($pV), '_TABLE:') === 0) {
755
756
                        // Parse parameters:
757 6
                        $subparts = GeneralUtility::trimExplode(';', $pV);
758 6
                        $subpartParams = [];
759 6
                        foreach ($subparts as $spV) {
760 6
                            [$pKey, $pVal] = GeneralUtility::trimExplode(':', $spV);
761 6
                            $subpartParams[$pKey] = $pVal;
762
                        }
763
764
                        // Table exists:
765 6
                        if (isset($GLOBALS['TCA'][$subpartParams['_TABLE']])) {
766 6
                            $lookUpPid = isset($subpartParams['_PID']) ? intval($subpartParams['_PID']) : intval($pid);
767 6
                            $recursiveDepth = isset($subpartParams['_RECURSIVE']) ? intval($subpartParams['_RECURSIVE']) : 0;
768 6
                            $pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid';
769 6
                            $where = $subpartParams['_WHERE'] ?? '';
770 6
                            $addTable = $subpartParams['_ADDTABLE'] ?? '';
771
772 6
                            $fieldName = $subpartParams['_FIELD'] ? $subpartParams['_FIELD'] : 'uid';
773 6
                            if ($fieldName === 'uid' || $GLOBALS['TCA'][$subpartParams['_TABLE']]['columns'][$fieldName]) {
774 6
                                $queryBuilder = $this->getQueryBuilder($subpartParams['_TABLE']);
775
776 6
                                if ($recursiveDepth > 0) {
777
                                    /** @var QueryGenerator $queryGenerator */
778 2
                                    $queryGenerator = GeneralUtility::makeInstance(QueryGenerator::class);
779 2
                                    $pidList = $queryGenerator->getTreeList($lookUpPid, $recursiveDepth, 0, 1);
780 2
                                    $pidArray = GeneralUtility::intExplode(',', $pidList);
781
                                } else {
782 4
                                    $pidArray = [(string) $lookUpPid];
783
                                }
784
785 6
                                $queryBuilder->getRestrictions()
786 6
                                    ->removeAll()
787 6
                                    ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
788
789
                                $queryBuilder
790 6
                                    ->select($fieldName)
791 6
                                    ->from($subpartParams['_TABLE'])
792 6
                                    ->where(
793 6
                                        $queryBuilder->expr()->in($pidField, $queryBuilder->createNamedParameter($pidArray, Connection::PARAM_INT_ARRAY)),
794
                                        $where
795
                                    );
796
797 6
                                if (! empty($addTable)) {
798
                                    // TODO: Check if this works as intended!
799
                                    $queryBuilder->add('from', $addTable);
800
                                }
801 6
                                $transOrigPointerField = $GLOBALS['TCA'][$subpartParams['_TABLE']]['ctrl']['transOrigPointerField'];
802
803 6
                                if ($subpartParams['_ENABLELANG'] && $transOrigPointerField) {
804
                                    $queryBuilder->andWhere(
805
                                        $queryBuilder->expr()->lte(
806
                                            $transOrigPointerField,
807
                                            0
808
                                        )
809
                                    );
810
                                }
811
812 6
                                $statement = $queryBuilder->execute();
813
814 6
                                $rows = [];
815 6
                                while ($row = $statement->fetch()) {
816 6
                                    $rows[$row[$fieldName]] = $row;
817
                                }
818
819 6
                                if (is_array($rows)) {
820 6
                                    $paramArray[$p] = array_merge($paramArray[$p], array_keys($rows));
821
                                }
822
                            }
823
                        }
824
                    } else {
825
                        // Just add value:
826 7
                        $paramArray[$p][] = $pV;
827
                    }
828
                    // Hook for processing own expandParameters place holder
829 14
                    if (is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'])) {
830
                        $_params = [
831
                            'pObj' => &$this,
832
                            'paramArray' => &$paramArray,
833
                            'currentKey' => $p,
834
                            'currentValue' => $pV,
835
                            'pid' => $pid,
836
                        ];
837
                        foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $_funcRef) {
838
                            GeneralUtility::callUserFunction($_funcRef, $_params, $this);
839
                        }
840
                    }
841
                }
842
843
                // Make unique set of values and sort array by key:
844 14
                $paramArray[$p] = array_unique($paramArray[$p]);
845 14
                ksort($paramArray);
846
            } else {
847
                // Set the literal value as only value in array:
848 7
                $paramArray[$p] = [$v];
849
            }
850
        }
851
852 14
        return $paramArray;
853
    }
854
855
    /**
856
     * Compiling URLs from parameter array (output of expandParameters())
857
     * The number of URLs will be the multiplication of the number of parameter values for each key
858
     *
859
     * @param array $paramArray Output of expandParameters(): Array with keys (GET var names) and for each an array of values
860
     * @param array $urls URLs accumulated in this array (for recursion)
861
     * @return array
862
     */
863 11
    public function compileUrls($paramArray, array $urls)
864
    {
865 11
        if (empty($paramArray)) {
866 11
            return $urls;
867
        }
868 10
        $varName = key($paramArray);
869 10
        $valueSet = array_shift($paramArray);
870
871
        // Traverse value set:
872 10
        $newUrls = [];
873 10
        foreach ($urls as $url) {
874 9
            foreach ($valueSet as $val) {
875 9
                if (count($newUrls) < $this->getMaximumUrlsToCompile()) {
876 9
                    $newUrls[] = $url . (strcmp((string) $val, '') ? '&' . rawurlencode($varName) . '=' . rawurlencode((string) $val) : '');
877
                }
878
            }
879
        }
880 10
        return $this->compileUrls($paramArray, $newUrls);
881
    }
882
883
    /************************************
884
     *
885
     * Crawler log
886
     *
887
     ************************************/
888
889
    /**
890
     * Return array of records from crawler queue for input page ID
891
     *
892
     * @param integer $id Page ID for which to look up log entries.
893
     * @param boolean $doFlush If TRUE, then entries selected at DELETED(!) instead of selected!
894
     * @param boolean $doFullFlush
895
     * @param integer $itemsPerPage Limit the amount of entries per page default is 10
896
     * @return array
897
     *
898
     * @deprecated
899
     */
900 4
    public function getLogEntriesForPageId($id, QueueFilter $queueFilter, $doFlush = false, $doFullFlush = false, $itemsPerPage = 10)
0 ignored issues
show
Unused Code introduced by
The parameter $doFullFlush is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

900
    public function getLogEntriesForPageId($id, QueueFilter $queueFilter, $doFlush = false, /** @scrutinizer ignore-unused */ $doFullFlush = false, $itemsPerPage = 10)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
901
    {
902 4
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

902
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
903
        $queryBuilder
904 4
            ->select('*')
905 4
            ->from($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

905
            ->from(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
906 4
            ->where(
907 4
                $queryBuilder->expr()->eq('page_id', $queryBuilder->createNamedParameter($id, PDO::PARAM_INT))
908
            )
909 4
            ->orderBy('scheduled', 'DESC');
910
911 4
        $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
912 4
            ->getConnectionForTable($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

912
            ->getConnectionForTable(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
913 4
            ->getExpressionBuilder();
914 4
        $query = $expressionBuilder->andX();
0 ignored issues
show
Unused Code introduced by
The assignment to $query is dead and can be removed.
Loading history...
915
        // PHPStorm adds the highlight that the $addWhere is immediately overwritten,
916
        // but the $query = $expressionBuilder->andX() ensures that the $addWhere is written correctly with AND
917
        // between the statements, it's not a mistake in the code.
918 4
        switch ($queueFilter) {
919 4
            case 'pending':
920
                $queryBuilder->andWhere($queryBuilder->expr()->eq('exec_time', 0));
921
                break;
922 4
            case 'finished':
923
                $queryBuilder->andWhere($queryBuilder->expr()->gt('exec_time', 0));
924
                break;
925
        }
926
927 4
        if ($doFlush) {
928 2
            $this->queueRepository->flushQueue($queueFilter);
929
        }
930 4
        if ($itemsPerPage > 0) {
931
            $queryBuilder
932 4
                ->setMaxResults((int) $itemsPerPage);
933
        }
934
935 4
        return $queryBuilder->execute()->fetchAll();
936
    }
937
938
    /**
939
     * Return array of records from crawler queue for input set ID
940
     *
941
     * @param int $set_id Set ID for which to look up log entries.
942
     * @param string $filter Filter: "all" => all entries, "pending" => all that is not yet run, "finished" => all complete ones
943
     * @param bool $doFlush If TRUE, then entries selected at DELETED(!) instead of selected!
944
     * @param int $itemsPerPage Limit the amount of entries per page default is 10
945
     * @return array
946
     *
947
     * @deprecated
948
     */
949 6
    public function getLogEntriesForSetId(int $set_id, string $filter = '', bool $doFlush = false, bool $doFullFlush = false, int $itemsPerPage = 10)
950
    {
951 6
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

951
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
952
        $queryBuilder
953 6
            ->select('*')
954 6
            ->from($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

954
            ->from(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
955 6
            ->where(
956 6
                $queryBuilder->expr()->eq('set_id', $queryBuilder->createNamedParameter($set_id, PDO::PARAM_INT))
957
            )
958 6
            ->orderBy('scheduled', 'DESC');
959
960 6
        $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
961 6
            ->getConnectionForTable($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

961
            ->getConnectionForTable(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
962 6
            ->getExpressionBuilder();
963 6
        $query = $expressionBuilder->andX();
964
        // PHPStorm adds the highlight that the $addWhere is immediately overwritten,
965
        // but the $query = $expressionBuilder->andX() ensures that the $addWhere is written correctly with AND
966
        // between the statements, it's not a mistake in the code.
967 6
        $addWhere = '';
968 6
        switch ($filter) {
969 6
            case 'pending':
970 1
                $queryBuilder->andWhere($queryBuilder->expr()->eq('exec_time', 0));
971 1
                $addWhere = $query->add($expressionBuilder->eq('exec_time', 0));
0 ignored issues
show
Unused Code introduced by
The assignment to $addWhere is dead and can be removed.
Loading history...
972 1
                break;
973 5
            case 'finished':
974 1
                $queryBuilder->andWhere($queryBuilder->expr()->gt('exec_time', 0));
975 1
                $addWhere = $query->add($expressionBuilder->gt('exec_time', 0));
976 1
                break;
977
        }
978 6
        if ($doFlush) {
979 4
            $addWhere = $query->add($expressionBuilder->eq('set_id', (int) $set_id));
980 4
            $this->flushQueue($doFullFlush ? '' : $addWhere);
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...ontroller::flushQueue() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

980
            /** @scrutinizer ignore-deprecated */ $this->flushQueue($doFullFlush ? '' : $addWhere);
Loading history...
981 4
            return [];
982
        }
983 2
        if ($itemsPerPage > 0) {
984
            $queryBuilder
985 2
                ->setMaxResults((int) $itemsPerPage);
986
        }
987
988 2
        return $queryBuilder->execute()->fetchAll();
989
    }
990
991
    /**
992
     * Adding call back entries to log (called from hooks typically, see indexed search class "class.crawler.php"
993
     *
994
     * @param integer $setId Set ID
995
     * @param array $params Parameters to pass to call back function
996
     * @param string $callBack Call back object reference, eg. 'EXT:indexed_search/class.crawler.php:&tx_indexedsearch_crawler'
997
     * @param integer $page_id Page ID to attach it to
998
     * @param integer $schedule Time at which to activate
999
     */
1000
    public function addQueueEntry_callBack($setId, $params, $callBack, $page_id = 0, $schedule = 0): void
1001
    {
1002
        if (! is_array($params)) {
0 ignored issues
show
introduced by
The condition is_array($params) is always true.
Loading history...
1003
            $params = [];
1004
        }
1005
        $params['_CALLBACKOBJ'] = $callBack;
1006
1007
        GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME)
1008
            ->insert(
1009
                QueueRepository::TABLE_NAME,
1010
                [
1011
                    'page_id' => (int) $page_id,
1012
                    'parameters' => json_encode($params),
1013
                    'scheduled' => (int) $schedule ?: $this->getCurrentTime(),
1014
                    'exec_time' => 0,
1015
                    'set_id' => (int) $setId,
1016
                    'result_data' => '',
1017
                ]
1018
            );
1019
    }
1020
1021
    /************************************
1022
     *
1023
     * URL setting
1024
     *
1025
     ************************************/
1026
1027
    /**
1028
     * Setting a URL for crawling:
1029
     *
1030
     * @param integer $id Page ID
1031
     * @param string $url Complete URL
1032
     * @param array $subCfg Sub configuration array (from TS config)
1033
     * @param integer $tstamp Scheduled-time
1034
     * @param string $configurationHash (optional) configuration hash
1035
     * @param bool $skipInnerDuplicationCheck (optional) skip inner duplication check
1036
     * @return bool
1037
     */
1038 11
    public function addUrl(
1039
        $id,
1040
        $url,
1041
        array $subCfg,
1042
        $tstamp,
1043
        $configurationHash = '',
1044
        $skipInnerDuplicationCheck = false
1045
    ) {
1046 11
        $urlAdded = false;
1047 11
        $rows = [];
1048
1049
        // Creating parameters:
1050
        $parameters = [
1051 11
            'url' => $url,
1052
        ];
1053
1054
        // fe user group simulation:
1055 11
        $uGs = implode(',', array_unique(GeneralUtility::intExplode(',', $subCfg['userGroups'], true)));
1056 11
        if ($uGs) {
1057 1
            $parameters['feUserGroupList'] = $uGs;
1058
        }
1059
1060
        // Setting processing instructions
1061 11
        $parameters['procInstructions'] = GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']);
1062 11
        if (is_array($subCfg['procInstrParams.'])) {
1063 8
            $parameters['procInstrParams'] = $subCfg['procInstrParams.'];
1064
        }
1065
1066
        // Compile value array:
1067 11
        $parameters_serialized = json_encode($parameters);
1068
        $fieldArray = [
1069 11
            'page_id' => (int) $id,
1070 11
            'parameters' => $parameters_serialized,
1071 11
            'parameters_hash' => GeneralUtility::shortMD5($parameters_serialized),
1072 11
            'configuration_hash' => $configurationHash,
1073 11
            'scheduled' => $tstamp,
1074 11
            'exec_time' => 0,
1075 11
            'set_id' => (int) $this->setID,
1076 11
            'result_data' => '',
1077 11
            'configuration' => $subCfg['key'],
1078
        ];
1079
1080 11
        if ($this->registerQueueEntriesInternallyOnly) {
1081
            //the entries will only be registered and not stored to the database
1082 1
            $this->queueEntries[] = $fieldArray;
1083
        } else {
1084 10
            if (! $skipInnerDuplicationCheck) {
1085
                // check if there is already an equal entry
1086 9
                $rows = $this->queueRepository->getDuplicateQueueItemsIfExists(
1087 9
                    (bool) $this->extensionSettings['enableTimeslot'],
1088
                    $tstamp,
1089 9
                    $this->getCurrentTime(),
1090 9
                    $fieldArray['page_id'],
1091 9
                    $fieldArray['parameters_hash']
1092
                );
1093
            }
1094
1095 10
            if (empty($rows)) {
1096 9
                $connectionForCrawlerQueue = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME);
1097 9
                $connectionForCrawlerQueue->insert(
1098 9
                    QueueRepository::TABLE_NAME,
1099
                    $fieldArray
1100
                );
1101 9
                $uid = $connectionForCrawlerQueue->lastInsertId(QueueRepository::TABLE_NAME, 'qid');
1102 9
                $rows[] = $uid;
1103 9
                $urlAdded = true;
1104
1105 9
                $signalPayload = ['uid' => $uid, 'fieldArray' => $fieldArray];
1106 9
                SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1106
                /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1107 9
                    self::class,
1108 9
                    SignalSlotUtility::SIGNAL_URL_ADDED_TO_QUEUE,
1109
                    $signalPayload
1110
                );
1111
            } else {
1112 5
                $signalPayload = ['rows' => $rows, 'fieldArray' => $fieldArray];
1113 5
                SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1113
                /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1114 5
                    self::class,
1115 5
                    SignalSlotUtility::SIGNAL_DUPLICATE_URL_IN_QUEUE,
1116
                    $signalPayload
1117
                );
1118
            }
1119
        }
1120
1121 11
        return $urlAdded;
1122
    }
1123
1124
    /**
1125
     * Returns the current system time
1126
     *
1127
     * @return int
1128
     */
1129 4
    public function getCurrentTime()
1130
    {
1131 4
        return time();
1132
    }
1133
1134
    /************************************
1135
     *
1136
     * URL reading
1137
     *
1138
     ************************************/
1139
1140
    /**
1141
     * Read URL for single queue entry
1142
     *
1143
     * @param integer $queueId
1144
     * @param boolean $force If set, will process even if exec_time has been set!
1145
     *
1146
     * @return int|null
1147
     */
1148 2
    public function readUrl($queueId, $force = false)
1149
    {
1150 2
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(QueueRepository::TABLE_NAME);
1151 2
        $ret = 0;
1152 2
        $this->logger->debug('crawler-readurl start ' . microtime(true));
0 ignored issues
show
Bug introduced by
The method debug() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1152
        $this->logger->/** @scrutinizer ignore-call */ 
1153
                       debug('crawler-readurl start ' . microtime(true));

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
1153
1154
        $queryBuilder
1155 2
            ->select('*')
1156 2
            ->from(QueueRepository::TABLE_NAME)
1157 2
            ->where(
1158 2
                $queryBuilder->expr()->eq('qid', $queryBuilder->createNamedParameter($queueId, PDO::PARAM_INT))
1159
            );
1160 2
        if (! $force) {
1161
            $queryBuilder
1162 2
                ->andWhere('exec_time = 0')
1163 2
                ->andWhere('process_scheduled > 0');
1164
        }
1165 2
        $queueRec = $queryBuilder->execute()->fetch();
1166
1167 2
        if (! is_array($queueRec)) {
1168
            return;
1169
        }
1170
1171 2
        SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1171
        /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1172 2
            self::class,
1173 2
            SignalSlotUtility::SIGNAL_QUEUEITEM_PREPROCESS,
1174 2
            [$queueId, &$queueRec]
1175
        );
1176
1177
        // Set exec_time to lock record:
1178 2
        $field_array = ['exec_time' => $this->getCurrentTime()];
1179
1180 2
        if (isset($this->processID)) {
1181
            //if mulitprocessing is used we need to store the id of the process which has handled this entry
1182 2
            $field_array['process_id_completed'] = $this->processID;
1183
        }
1184
1185 2
        GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME)
1186 2
            ->update(
1187 2
                QueueRepository::TABLE_NAME,
1188
                $field_array,
1189 2
                ['qid' => (int) $queueId]
1190
            );
1191
1192 2
        $result = $this->queueExecutor->executeQueueItem($queueRec, $this);
1193 2
        if ($result['content'] === null) {
1194
            $resultData = 'An errors happened';
0 ignored issues
show
Unused Code introduced by
The assignment to $resultData is dead and can be removed.
Loading history...
1195
        } else {
1196
            /** @var JsonCompatibilityConverter $jsonCompatibilityConverter */
1197 2
            $jsonCompatibilityConverter = GeneralUtility::makeInstance(JsonCompatibilityConverter::class);
1198 2
            $resultData = $jsonCompatibilityConverter->convert($result['content']);
1199
1200
            //atm there's no need to point to specific pollable extensions
1201 2
            if (is_array($resultData) && is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pollSuccess'])) {
1202
                foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pollSuccess'] as $pollable) {
1203
                    // only check the success value if the instruction is runnig
1204
                    // it is important to name the pollSuccess key same as the procInstructions key
1205
                    if (is_array($resultData['parameters']['procInstructions'])
1206
                        && in_array(
1207
                            $pollable,
1208
                            $resultData['parameters']['procInstructions'], true
1209
                        )
1210
                    ) {
1211
                        if (! empty($resultData['success'][$pollable]) && $resultData['success'][$pollable]) {
1212
                            $ret |= self::CLI_STATUS_POLLABLE_PROCESSED;
1213
                        }
1214
                    }
1215
                }
1216
            }
1217
        }
1218
        // Set result in log which also denotes the end of the processing of this entry.
1219 2
        $field_array = ['result_data' => json_encode($result)];
1220
1221 2
        SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1221
        /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1222 2
            self::class,
1223 2
            SignalSlotUtility::SIGNAL_QUEUEITEM_POSTPROCESS,
1224 2
            [$queueId, &$field_array]
1225
        );
1226
1227 2
        GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME)
1228 2
            ->update(
1229 2
                QueueRepository::TABLE_NAME,
1230
                $field_array,
1231 2
                ['qid' => (int) $queueId]
1232
            );
1233
1234 2
        $this->logger->debug('crawler-readurl stop ' . microtime(true));
1235 2
        return $ret;
1236
    }
1237
1238
    /**
1239
     * Read URL for not-yet-inserted log-entry
1240
     *
1241
     * @param array $field_array Queue field array,
1242
     *
1243
     * @return array|bool|mixed|string
1244
     */
1245
    public function readUrlFromArray($field_array)
1246
    {
1247
        // Set exec_time to lock record:
1248
        $field_array['exec_time'] = $this->getCurrentTime();
1249
        $connectionForCrawlerQueue = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME);
1250
        $connectionForCrawlerQueue->insert(
1251
            QueueRepository::TABLE_NAME,
1252
            $field_array
1253
        );
1254
        $queueId = $field_array['qid'] = $connectionForCrawlerQueue->lastInsertId(QueueRepository::TABLE_NAME, 'qid');
1255
        $result = $this->queueExecutor->executeQueueItem($field_array, $this);
1256
1257
        // Set result in log which also denotes the end of the processing of this entry.
1258
        $field_array = ['result_data' => json_encode($result)];
1259
1260
        SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1260
        /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1261
            self::class,
1262
            SignalSlotUtility::SIGNAL_QUEUEITEM_POSTPROCESS,
1263
            [$queueId, &$field_array]
1264
        );
1265
1266
        $connectionForCrawlerQueue->update(
1267
            QueueRepository::TABLE_NAME,
1268
            $field_array,
1269
            ['qid' => $queueId]
1270
        );
1271
1272
        return $result;
1273
    }
1274
1275
    /*****************************
1276
     *
1277
     * Compiling URLs to crawl - tools
1278
     *
1279
     *****************************/
1280
1281
    /**
1282
     * @param integer $id Root page id to start from.
1283
     * @param integer $depth Depth of tree, 0=only id-page, 1= on sublevel, 99 = infinite
1284
     * @param integer $scheduledTime Unix Time when the URL is timed to be visited when put in queue
1285
     * @param integer $reqMinute Number of requests per minute (creates the interleave between requests)
1286
     * @param boolean $submitCrawlUrls If set, submits the URLs to queue in database (real crawling)
1287
     * @param boolean $downloadCrawlUrls If set (and submitcrawlUrls is false) will fill $downloadUrls with entries)
1288
     * @param array $incomingProcInstructions Array of processing instructions
1289
     * @param array $configurationSelection Array of configuration keys
1290
     * @return string
1291
     */
1292
    public function getPageTreeAndUrls(
1293
        $id,
1294
        $depth,
1295
        $scheduledTime,
1296
        $reqMinute,
1297
        $submitCrawlUrls,
1298
        $downloadCrawlUrls,
1299
        array $incomingProcInstructions,
1300
        array $configurationSelection
1301
    ) {
1302
        $this->scheduledTime = $scheduledTime;
1303
        $this->reqMinute = $reqMinute;
1304
        $this->submitCrawlUrls = $submitCrawlUrls;
1305
        $this->downloadCrawlUrls = $downloadCrawlUrls;
1306
        $this->incomingProcInstructions = $incomingProcInstructions;
1307
        $this->incomingConfigurationSelection = $configurationSelection;
1308
1309
        $this->duplicateTrack = [];
1310
        $this->downloadUrls = [];
1311
1312
        // Drawing tree:
1313
        /* @var PageTreeView $tree */
1314
        $tree = GeneralUtility::makeInstance(PageTreeView::class);
1315
        $perms_clause = $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW);
1316
        $tree->init('AND ' . $perms_clause);
1317
1318
        $pageInfo = BackendUtility::readPageAccess($id, $perms_clause);
1319
        if (is_array($pageInfo)) {
1320
            // Set root row:
1321
            $tree->tree[] = [
1322
                'row' => $pageInfo,
1323
                'HTML' => $this->iconFactory->getIconForRecord('pages', $pageInfo, Icon::SIZE_SMALL),
1324
            ];
1325
        }
1326
1327
        // Get branch beneath:
1328
        if ($depth) {
1329
            $tree->getTree($id, $depth, '');
1330
        }
1331
1332
        // Traverse page tree:
1333
        $code = '';
1334
1335
        foreach ($tree->tree as $data) {
1336
            $this->MP = false;
1337
1338
            // recognize mount points
1339
            if ($data['row']['doktype'] === PageRepository::DOKTYPE_MOUNTPOINT) {
1340
                $mountpage = $this->pageRepository->getPage($data['row']['uid']);
1341
1342
                // fetch mounted pages
1343
                $this->MP = $mountpage[0]['mount_pid'] . '-' . $data['row']['uid'];
0 ignored issues
show
Documentation Bug introduced by
The property $MP was declared of type boolean, but $mountpage[0]['mount_pid...' . $data['row']['uid'] is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
1344
1345
                $mountTree = GeneralUtility::makeInstance(PageTreeView::class);
1346
                $mountTree->init('AND ' . $perms_clause);
1347
                $mountTree->getTree($mountpage[0]['mount_pid'], $depth);
1348
1349
                foreach ($mountTree->tree as $mountData) {
1350
                    $code .= $this->drawURLs_addRowsForPage(
1351
                        $mountData['row'],
1352
                        $mountData['HTML'] . BackendUtility::getRecordTitle('pages', $mountData['row'], true)
1353
                    );
1354
                }
1355
1356
                // replace page when mount_pid_ol is enabled
1357
                if ($mountpage[0]['mount_pid_ol']) {
1358
                    $data['row']['uid'] = $mountpage[0]['mount_pid'];
1359
                } else {
1360
                    // if the mount_pid_ol is not set the MP must not be used for the mountpoint page
1361
                    $this->MP = false;
1362
                }
1363
            }
1364
1365
            $code .= $this->drawURLs_addRowsForPage(
1366
                $data['row'],
1367
                $data['HTML'] . BackendUtility::getRecordTitle('pages', $data['row'], true)
1368
            );
1369
        }
1370
1371
        return $code;
1372
    }
1373
1374
    /**
1375
     * Expands exclude string
1376
     *
1377
     * @param string $excludeString Exclude string
1378
     * @return array
1379
     */
1380 2
    public function expandExcludeString($excludeString)
1381
    {
1382
        // internal static caches;
1383 2
        static $expandedExcludeStringCache;
1384 2
        static $treeCache;
1385
1386 2
        if (empty($expandedExcludeStringCache[$excludeString])) {
1387 2
            $pidList = [];
1388
1389 2
            if (! empty($excludeString)) {
1390
                /** @var PageTreeView $tree */
1391 1
                $tree = GeneralUtility::makeInstance(PageTreeView::class);
1392 1
                $tree->init('AND ' . $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW));
1393
1394 1
                $excludeParts = GeneralUtility::trimExplode(',', $excludeString);
1395
1396 1
                foreach ($excludeParts as $excludePart) {
1397 1
                    [$pid, $depth] = GeneralUtility::trimExplode('+', $excludePart);
1398
1399
                    // default is "page only" = "depth=0"
1400 1
                    if (empty($depth)) {
1401 1
                        $depth = (stristr($excludePart, '+')) ? 99 : 0;
1402
                    }
1403
1404 1
                    $pidList[] = (int) $pid;
1405
1406 1
                    if ($depth > 0) {
1407
                        if (empty($treeCache[$pid][$depth])) {
1408
                            $tree->reset();
1409
                            $tree->getTree($pid, $depth);
0 ignored issues
show
Bug introduced by
$pid of type string is incompatible with the type integer expected by parameter $uid of TYPO3\CMS\Backend\Tree\V...ractTreeView::getTree(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1409
                            $tree->getTree(/** @scrutinizer ignore-type */ $pid, $depth);
Loading history...
1410
                            $treeCache[$pid][$depth] = $tree->tree;
1411
                        }
1412
1413
                        foreach ($treeCache[$pid][$depth] as $data) {
1414
                            $pidList[] = (int) $data['row']['uid'];
1415
                        }
1416
                    }
1417
                }
1418
            }
1419
1420 2
            $expandedExcludeStringCache[$excludeString] = array_unique($pidList);
1421
        }
1422
1423 2
        return $expandedExcludeStringCache[$excludeString];
1424
    }
1425
1426
    /**
1427
     * Create the rows for display of the page tree
1428
     * For each page a number of rows are shown displaying GET variable configuration
1429
     */
1430
    public function drawURLs_addRowsForPage(array $pageRow, string $pageTitle): string
1431
    {
1432
        $skipMessage = '';
1433
1434
        // Get list of configurations
1435
        $configurations = $this->getUrlsForPageRow($pageRow, $skipMessage);
1436
        $configurations = ConfigurationService::removeDisallowedConfigurations($this->incomingConfigurationSelection, $configurations);
1437
1438
        // Traverse parameter combinations:
1439
        $c = 0;
1440
        $content = '';
1441
        if (! empty($configurations)) {
1442
            foreach ($configurations as $confKey => $confArray) {
1443
1444
                // Title column:
1445
                if (! $c) {
1446
                    $titleClm = '<td rowspan="' . count($configurations) . '">' . $pageTitle . '</td>';
1447
                } else {
1448
                    $titleClm = '';
1449
                }
1450
1451
                if (! in_array($pageRow['uid'], $this->expandExcludeString($confArray['subCfg']['exclude']), true)) {
1452
1453
                    // URL list:
1454
                    $urlList = $this->urlListFromUrlArray(
1455
                        $confArray,
1456
                        $pageRow,
1457
                        $this->scheduledTime,
1458
                        $this->reqMinute,
1459
                        $this->submitCrawlUrls,
1460
                        $this->downloadCrawlUrls,
1461
                        $this->duplicateTrack,
1462
                        $this->downloadUrls,
1463
                        // if empty the urls won't be filtered by processing instructions
1464
                        $this->incomingProcInstructions
1465
                    );
1466
1467
                    // Expanded parameters:
1468
                    $paramExpanded = '';
1469
                    $calcAccu = [];
1470
                    $calcRes = 1;
1471
                    foreach ($confArray['paramExpanded'] as $gVar => $gVal) {
1472
                        $paramExpanded .= '
1473
                            <tr>
1474
                                <td>' . htmlspecialchars('&' . $gVar . '=') . '<br/>' .
1475
                            '(' . count($gVal) . ')' .
1476
                            '</td>
1477
                                <td nowrap="nowrap">' . nl2br(htmlspecialchars(implode(chr(10), $gVal))) . '</td>
1478
                            </tr>
1479
                        ';
1480
                        $calcRes *= count($gVal);
1481
                        $calcAccu[] = count($gVal);
1482
                    }
1483
                    $paramExpanded = '<table>' . $paramExpanded . '</table>';
1484
                    $paramExpanded .= 'Comb: ' . implode('*', $calcAccu) . '=' . $calcRes;
1485
1486
                    // Options
1487
                    $optionValues = '';
1488
                    if ($confArray['subCfg']['userGroups']) {
1489
                        $optionValues .= 'User Groups: ' . $confArray['subCfg']['userGroups'] . '<br/>';
1490
                    }
1491
                    if ($confArray['subCfg']['procInstrFilter']) {
1492
                        $optionValues .= 'ProcInstr: ' . $confArray['subCfg']['procInstrFilter'] . '<br/>';
1493
                    }
1494
1495
                    // Compile row:
1496
                    $content .= '
1497
                        <tr>
1498
                            ' . $titleClm . '
1499
                            <td>' . htmlspecialchars($confKey) . '</td>
1500
                            <td>' . nl2br(htmlspecialchars(rawurldecode(trim(str_replace('&', chr(10) . '&', GeneralUtility::implodeArrayForUrl('', $confArray['paramParsed'])))))) . '</td>
1501
                            <td>' . $paramExpanded . '</td>
1502
                            <td nowrap="nowrap">' . $urlList . '</td>
1503
                            <td nowrap="nowrap">' . $optionValues . '</td>
1504
                            <td nowrap="nowrap">' . DebugUtility::viewArray($confArray['subCfg']['procInstrParams.']) . '</td>
1505
                        </tr>';
1506
                } else {
1507
                    $content .= '<tr>
1508
                            ' . $titleClm . '
1509
                            <td>' . htmlspecialchars($confKey) . '</td>
1510
                            <td colspan="5"><em>No entries</em> (Page is excluded in this configuration)</td>
1511
                        </tr>';
1512
                }
1513
1514
                $c++;
1515
            }
1516
        } else {
1517
            $message = ! empty($skipMessage) ? ' (' . $skipMessage . ')' : '';
1518
1519
            // Compile row:
1520
            $content .= '
1521
                <tr>
1522
                    <td>' . $pageTitle . '</td>
1523
                    <td colspan="6"><em>No entries</em>' . $message . '</td>
1524
                </tr>';
1525
        }
1526
1527
        return $content;
1528
    }
1529
1530
    /*****************************
1531
     *
1532
     * CLI functions
1533
     *
1534
     *****************************/
1535
1536
    /**
1537
     * Running the functionality of the CLI (crawling URLs from queue)
1538
     */
1539 2
    public function CLI_run(int $countInARun, int $sleepTime, int $sleepAfterFinish): int
1540
    {
1541 2
        $result = 0;
1542 2
        $counter = 0;
1543
1544
        // First, run hooks:
1545 2
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['cli_hooks'] ?? [] as $objRef) {
1546 2
            trigger_error(
1547 2
                'This hook (crawler/cli_hooks) is deprecated since 9.1.5 and will be removed when dropping support for TYPO3 9LTS and 10LTS',
1548 2
                E_USER_DEPRECATED
1549
            );
1550 2
            $hookObj = GeneralUtility::makeInstance($objRef);
1551 2
            if (is_object($hookObj)) {
1552 2
                $hookObj->crawler_init($this);
1553
            }
1554
        }
1555
1556
        // Clean up the queue
1557 2
        $this->queueRepository->cleanupQueue();
1558
1559
        // Select entries:
1560 2
        $rows = $this->queueRepository->fetchRecordsToBeCrawled($countInARun);
1561
1562 2
        if (! empty($rows)) {
1563 2
            $quidList = [];
1564
1565 2
            foreach ($rows as $r) {
1566 2
                $quidList[] = $r['qid'];
1567
            }
1568
1569 2
            $processId = $this->CLI_buildProcessId();
1570
1571
            //save the number of assigned queue entries to determine how many have been processed later
1572 2
            $numberOfAffectedRows = $this->queueRepository->updateProcessIdAndSchedulerForQueueIds($quidList, $processId);
1573 2
            $this->processRepository->updateProcessAssignItemsCount($numberOfAffectedRows, $processId);
1574
1575 2
            if ($numberOfAffectedRows !== count($quidList)) {
1576
                return ($result | self::CLI_STATUS_ABORTED);
1577
            }
1578
1579 2
            foreach ($rows as $r) {
1580 2
                $result |= $this->readUrl($r['qid']);
1581
1582 2
                $counter++;
1583
                // Just to relax the system
1584 2
                usleep((int) $sleepTime);
1585
1586
                // if during the start and the current read url the cli has been disable we need to return from the function
1587
                // mark the process NOT as ended.
1588 2
                if ($this->crawler->isDisabled()) {
1589
                    return ($result | self::CLI_STATUS_ABORTED);
1590
                }
1591
1592 2
                if (! $this->processRepository->isProcessActive($this->CLI_buildProcessId())) {
1593
                    $this->CLI_debug('conflict / timeout (' . $this->CLI_buildProcessId() . ')');
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...Controller::CLI_debug() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1593
                    /** @scrutinizer ignore-deprecated */ $this->CLI_debug('conflict / timeout (' . $this->CLI_buildProcessId() . ')');
Loading history...
1594
                    $result |= self::CLI_STATUS_ABORTED;
1595
                    //possible timeout
1596
                    break;
1597
                }
1598
            }
1599
1600 2
            sleep((int) $sleepAfterFinish);
1601
        }
1602
1603 2
        if ($counter > 0) {
1604 2
            $result |= self::CLI_STATUS_PROCESSED;
1605
        }
1606
1607 2
        return $result;
1608
    }
1609
1610
    /**
1611
     * Activate hooks
1612
     * @deprecated
1613
     */
1614
    public function CLI_runHooks(): void
1615
    {
1616
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['cli_hooks'] ?? [] as $objRef) {
1617
            $hookObj = GeneralUtility::makeInstance($objRef);
1618
            if (is_object($hookObj)) {
1619
                $hookObj->crawler_init($this);
1620
            }
1621
        }
1622
    }
1623
1624
    /**
1625
     * Try to acquire a new process with the given id
1626
     * also performs some auto-cleanup for orphan processes
1627
     * @param string $id identification string for the process
1628
     * @return boolean
1629
     * @todo preemption might not be the most elegant way to clean up
1630
     */
1631 2
    public function CLI_checkAndAcquireNewProcess($id)
1632
    {
1633 2
        $ret = true;
1634
1635 2
        $systemProcessId = getmypid();
1636 2
        if (! $systemProcessId) {
1637
            return false;
1638
        }
1639
1640 2
        $processCount = 0;
1641 2
        $orphanProcesses = [];
1642
1643 2
        $activeProcesses = $this->processRepository->findAllActive();
1644 2
        $currentTime = $this->getCurrentTime();
1645
1646
        /** @var Process $process */
1647 2
        foreach ($activeProcesses as $process) {
1648
            if ($process->getTtl() < $currentTime) {
1649
                $orphanProcesses[] = $process->getProcessId();
1650
            } else {
1651
                $processCount++;
1652
            }
1653
        }
1654
1655
        // if there are less than allowed active processes then add a new one
1656 2
        if ($processCount < (int) $this->extensionSettings['processLimit']) {
1657 2
            $this->processRepository->addProcess($id, $systemProcessId);
1658
        } else {
1659
            $ret = false;
1660
        }
1661
1662 2
        $this->processRepository->deleteProcessesMarkedAsDeleted();
1663 2
        $this->processRepository->markRequestedProcessesAsNotActive($orphanProcesses);
1664 2
        $this->queueRepository->unsetProcessScheduledAndProcessIdForQueueEntries($orphanProcesses);
1665
1666 2
        return $ret;
1667
    }
1668
1669
    /**
1670
     * Release a process and the required resources
1671
     *
1672
     * @param mixed $releaseIds string with a single process-id or array with multiple process-ids
1673
     * @return boolean
1674
     * @deprecated
1675
     */
1676
    public function CLI_releaseProcesses($releaseIds)
1677
    {
1678
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1678
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1679
1680
        if (! is_array($releaseIds)) {
1681
            $releaseIds = [$releaseIds];
1682
        }
1683
1684
        if (empty($releaseIds)) {
1685
            //nothing to release
1686
            return false;
1687
        }
1688
1689
        // some kind of 2nd chance algo - this way you need at least 2 processes to have a real cleanup
1690
        // this ensures that a single process can't mess up the entire process table
1691
1692
        // mark all processes as deleted which have no "waiting" queue-entires and which are not active
1693
1694
        // ReleaseQueueEntries
1695
        $queryBuilder
1696
            ->update(QueueRepository::TABLE_NAME, 'q')
1697
            ->where(
1698
                'q.process_id IN(SELECT p.process_id FROM tx_crawler_process as p WHERE p.active = 0)'
1699
            )
1700
            ->set('q.process_scheduled', 0)
1701
            ->set('q.process_id', '')
1702
            ->execute();
1703
1704
        // FIXME: Not entirely sure that this is equivalent to the previous version
1705
        $queryBuilder->resetQueryPart('set');
1706
1707
        // ReleaseProcessEntries
1708
        $queryBuilder
1709
            ->update(ProcessRepository::TABLE_NAME)
1710
            ->where(
1711
                $queryBuilder->expr()->eq('active', 0),
1712
                'process_id IN(SELECT q.process_id FROM tx_crawler_queue as q WHERE q.exec_time = 0)'
1713
            )
1714
            ->set('system_process_id', 0)
1715
            ->execute();
1716
1717
        $this->processRepository->markRequestedProcessesAsNotActive($releaseIds);
1718
        $this->queueRepository->unsetProcessScheduledAndProcessIdForQueueEntries($releaseIds);
1719
1720
        return true;
1721
    }
1722
1723
    /**
1724
     * Create a unique Id for the current process
1725
     *
1726
     * @return string the ID
1727
     */
1728 3
    public function CLI_buildProcessId()
1729
    {
1730 3
        if (! $this->processID) {
1731 2
            $this->processID = GeneralUtility::shortMD5(microtime(true));
1732
        }
1733 3
        return $this->processID;
1734
    }
1735
1736
    /**
1737
     * Prints a message to the stdout (only if debug-mode is enabled)
1738
     *
1739
     * @param string $msg the message
1740
     * @deprecated
1741
     * @codeCoverageIgnore
1742
     */
1743
    public function CLI_debug($msg): void
1744
    {
1745
        if ((int) $this->extensionSettings['processDebug']) {
1746
            echo $msg . "\n";
1747
            flush();
1748
        }
1749
    }
1750
1751
    /**
1752
     * Cleans up entries that stayed for too long in the queue. These are:
1753
     * - processed entries that are over 1.5 days in age
1754
     * - scheduled entries that are over 7 days old
1755
     *
1756
     * @deprecated
1757
     */
1758 1
    public function cleanUpOldQueueEntries(): void
1759
    {
1760
        // 24*60*60 Seconds in 24 hours
1761 1
        $processedAgeInSeconds = $this->extensionSettings['cleanUpProcessedAge'] * 86400;
1762 1
        $scheduledAgeInSeconds = $this->extensionSettings['cleanUpScheduledAge'] * 86400;
1763
1764 1
        $now = time();
1765 1
        $condition = '(exec_time<>0 AND exec_time<' . ($now - $processedAgeInSeconds) . ') OR scheduled<=' . ($now - $scheduledAgeInSeconds);
1766 1
        $this->flushQueue($condition);
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...ontroller::flushQueue() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1766
        /** @scrutinizer ignore-deprecated */ $this->flushQueue($condition);
Loading history...
1767 1
    }
1768
1769
    /**
1770
     * Removes queue entries
1771
     *
1772
     * @param string $where SQL related filter for the entries which should be removed
1773
     *
1774
     * @deprecated
1775
     */
1776 5
    protected function flushQueue($where = ''): void
1777
    {
1778 5
        $realWhere = strlen((string) $where) > 0 ? $where : '1=1';
1779
1780 5
        $queryBuilder = $this->getQueryBuilder($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1780
        $queryBuilder = $this->getQueryBuilder(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1781
1782
        $groups = $queryBuilder
1783 5
            ->selectLiteral('DISTINCT set_id')
1784 5
            ->from($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1784
            ->from(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1785 5
            ->where($realWhere)
1786 5
            ->execute()
1787 5
            ->fetchAll();
1788 5
        if (is_array($groups)) {
0 ignored issues
show
introduced by
The condition is_array($groups) is always true.
Loading history...
1789 5
            foreach ($groups as $group) {
1790
                $subSet = $queryBuilder
1791 4
                    ->select('qid', 'set_id')
1792 4
                    ->from($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1792
                    ->from(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1793 4
                    ->where(
1794 4
                        $realWhere,
1795 4
                        $queryBuilder->expr()->eq('set_id', $group['set_id'])
1796
                    )
1797 4
                    ->execute()
1798 4
                    ->fetchAll();
1799
1800 4
                $payLoad = ['subSet' => $subSet];
1801 4
                SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1801
                /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1802 4
                    self::class,
1803 4
                    SignalSlotUtility::SIGNAL_QUEUE_ENTRY_FLUSH,
1804
                    $payLoad
1805
                );
1806
            }
1807
        }
1808
1809
        $queryBuilder
1810 5
            ->delete($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1810
            ->delete(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1811 5
            ->where($realWhere)
1812 5
            ->execute();
1813 5
    }
1814
1815
    /**
1816
     * This method determines duplicates for a queue entry with the same parameters and this timestamp.
1817
     * If the timestamp is in the past, it will check if there is any unprocessed queue entry in the past.
1818
     * If the timestamp is in the future it will check, if the queued entry has exactly the same timestamp
1819
     *
1820
     * @param int $tstamp
1821
     * @param array $fieldArray
1822
     *
1823
     * @return array
1824
     * @deprecated
1825
     */
1826 5
    protected function getDuplicateRowsIfExist($tstamp, $fieldArray)
1827
    {
1828 5
        $rows = [];
1829
1830 5
        $currentTime = $this->getCurrentTime();
1831
1832 5
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1832
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1833
        $queryBuilder
1834 5
            ->select('qid')
1835 5
            ->from(QueueRepository::TABLE_NAME);
1836
        //if this entry is scheduled with "now"
1837 5
        if ($tstamp <= $currentTime) {
1838 2
            if ($this->extensionSettings['enableTimeslot']) {
1839 1
                $timeBegin = $currentTime - 100;
1840 1
                $timeEnd = $currentTime + 100;
1841
                $queryBuilder
1842 1
                    ->where(
1843 1
                        'scheduled BETWEEN ' . $timeBegin . ' AND ' . $timeEnd . ''
1844
                    )
1845 1
                    ->orWhere(
1846 1
                        $queryBuilder->expr()->lte('scheduled', $currentTime)
1847
                    );
1848
            } else {
1849
                $queryBuilder
1850 1
                    ->where(
1851 2
                        $queryBuilder->expr()->lte('scheduled', $currentTime)
1852
                    );
1853
            }
1854 3
        } elseif ($tstamp > $currentTime) {
1855
            //entry with a timestamp in the future need to have the same schedule time
1856
            $queryBuilder
1857 3
                ->where(
1858 3
                    $queryBuilder->expr()->eq('scheduled', $tstamp)
1859
                );
1860
        }
1861
1862
        $queryBuilder
1863 5
            ->andWhere('NOT exec_time')
1864 5
            ->andWhere('NOT process_id')
1865 5
            ->andWhere($queryBuilder->expr()->eq('page_id', $queryBuilder->createNamedParameter($fieldArray['page_id'], PDO::PARAM_INT)))
1866 5
            ->andWhere($queryBuilder->expr()->eq('parameters_hash', $queryBuilder->createNamedParameter($fieldArray['parameters_hash'], PDO::PARAM_STR)));
1867
1868 5
        $statement = $queryBuilder->execute();
1869
1870 5
        while ($row = $statement->fetch()) {
1871 5
            $rows[] = $row['qid'];
1872
        }
1873
1874 5
        return $rows;
1875
    }
1876
1877
    /**
1878
     * Returns a md5 hash generated from a serialized configuration array.
1879
     *
1880
     * @return string
1881
     */
1882 13
    protected function getConfigurationHash(array $configuration)
1883
    {
1884 13
        unset($configuration['paramExpanded']);
1885 13
        unset($configuration['URLs']);
1886 13
        return md5(serialize($configuration));
1887
    }
1888
1889
    /**
1890
     * Build a URL from a Page and the Query String. If the page has a Site configuration, it can be built by using
1891
     * the Site instance.
1892
     *
1893
     * @param int $httpsOrHttp see tx_crawler_configuration.force_ssl
1894
     * @throws SiteNotFoundException
1895
     * @throws InvalidRouteArgumentsException
1896
     *
1897
     * @deprecated Using CrawlerController::getUrlFromPageAndQueryParameters() is deprecated since 9.1.1 and will be removed in v11.x, please use UrlService->getUrlFromPageAndQueryParameters() instead.
1898
     * @codeCoverageIgnore
1899
     */
1900
    protected function getUrlFromPageAndQueryParameters(int $pageId, string $queryString, ?string $alternativeBaseUrl, int $httpsOrHttp): UriInterface
1901
    {
1902
        $urlService = new UrlService();
1903
        return $urlService->getUrlFromPageAndQueryParameters($pageId, $queryString, $alternativeBaseUrl, $httpsOrHttp);
1904
    }
1905
1906 1
    protected function swapIfFirstIsLargerThanSecond(array $reg): array
1907
    {
1908
        // Swap if first is larger than last:
1909 1
        if ($reg[1] > $reg[2]) {
1910
            $temp = $reg[2];
1911
            $reg[2] = $reg[1];
1912
            $reg[1] = $temp;
1913
        }
1914
1915 1
        return $reg;
1916
    }
1917
1918 7
    protected function getPageService(): PageService
1919
    {
1920 7
        return new PageService();
1921
    }
1922
1923 9
    private function getMaximumUrlsToCompile(): int
1924
    {
1925 9
        return $this->maximumUrlsToCompile;
1926
    }
1927
1928
    /**
1929
     * @return BackendUserAuthentication
1930
     */
1931 3
    private function getBackendUser()
1932
    {
1933
        // Make sure the _cli_ user is loaded
1934 3
        Bootstrap::initializeBackendAuthentication();
1935 3
        if ($this->backendUser === null) {
1936 3
            $this->backendUser = $GLOBALS['BE_USER'];
1937
        }
1938 3
        return $this->backendUser;
1939
    }
1940
1941
    /**
1942
     * Get querybuilder for given table
1943
     *
1944
     * @return QueryBuilder
1945
     */
1946 11
    private function getQueryBuilder(string $table)
1947
    {
1948 11
        return GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($table);
1949
    }
1950
}
1951