Passed
Pull Request — version9_bugfixes (#763)
by
unknown
17:09
created

CrawlerController::setProcessID()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 3
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Controller;
6
7
/***************************************************************
8
 *  Copyright notice
9
 *
10
 *  (c) 2020 AOE GmbH <[email protected]>
11
 *
12
 *  All rights reserved
13
 *
14
 *  This script is part of the TYPO3 project. The TYPO3 project is
15
 *  free software; you can redistribute it and/or modify
16
 *  it under the terms of the GNU General Public License as published by
17
 *  the Free Software Foundation; either version 3 of the License, or
18
 *  (at your option) any later version.
19
 *
20
 *  The GNU General Public License can be found at
21
 *  http://www.gnu.org/copyleft/gpl.html.
22
 *
23
 *  This script is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26
 *  GNU General Public License for more details.
27
 *
28
 *  This copyright notice MUST APPEAR in all copies of the script!
29
 ***************************************************************/
30
31
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
32
use AOE\Crawler\Converter\JsonCompatibilityConverter;
33
use AOE\Crawler\Crawler;
34
use AOE\Crawler\CrawlStrategy\CrawlStrategyFactory;
35
use AOE\Crawler\Domain\Model\Process;
36
use AOE\Crawler\Domain\Repository\ConfigurationRepository;
37
use AOE\Crawler\Domain\Repository\ProcessRepository;
38
use AOE\Crawler\Domain\Repository\QueueRepository;
39
use AOE\Crawler\QueueExecutor;
40
use AOE\Crawler\Service\ConfigurationService;
41
use AOE\Crawler\Service\PageService;
42
use AOE\Crawler\Service\UrlService;
43
use AOE\Crawler\Utility\SignalSlotUtility;
44
use AOE\Crawler\Value\QueueFilter;
45
use PDO;
46
use Psr\Http\Message\UriInterface;
47
use Psr\Log\LoggerAwareInterface;
48
use Psr\Log\LoggerAwareTrait;
49
use TYPO3\CMS\Backend\Tree\View\PageTreeView;
50
use TYPO3\CMS\Backend\Utility\BackendUtility;
51
use TYPO3\CMS\Core\Authentication\BackendUserAuthentication;
52
use TYPO3\CMS\Core\Compatibility\PublicMethodDeprecationTrait;
53
use TYPO3\CMS\Core\Compatibility\PublicPropertyDeprecationTrait;
54
use TYPO3\CMS\Core\Core\Bootstrap;
55
use TYPO3\CMS\Core\Core\Environment;
56
use TYPO3\CMS\Core\Database\Connection;
57
use TYPO3\CMS\Core\Database\ConnectionPool;
58
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
59
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
60
use TYPO3\CMS\Core\Database\QueryGenerator;
61
use TYPO3\CMS\Core\Domain\Repository\PageRepository;
62
use TYPO3\CMS\Core\Exception\SiteNotFoundException;
63
use TYPO3\CMS\Core\Imaging\Icon;
64
use TYPO3\CMS\Core\Imaging\IconFactory;
65
use TYPO3\CMS\Core\Routing\InvalidRouteArgumentsException;
66
use TYPO3\CMS\Core\Site\Entity\Site;
67
use TYPO3\CMS\Core\Type\Bitmask\Permission;
68
use TYPO3\CMS\Core\Utility\DebugUtility;
69
use TYPO3\CMS\Core\Utility\GeneralUtility;
70
use TYPO3\CMS\Core\Utility\MathUtility;
71
use TYPO3\CMS\Extbase\Object\ObjectManager;
72
73
/**
74
 * Class CrawlerController
75
 *
76
 * @package AOE\Crawler\Controller
77
 */
78
class CrawlerController implements LoggerAwareInterface
79
{
80
    use LoggerAwareTrait;
81
    use PublicMethodDeprecationTrait;
82
    use PublicPropertyDeprecationTrait;
83
84
    /**
85
     * @deprecated since 9.2.5 will be removed in v11.x
86
     */
87
    public const CLI_STATUS_NOTHING_PROCCESSED = 0;
88
89
    /**
90
     * queue not empty
91
     * @deprecated since 9.2.5 will be removed in v11.x
92
     */
93
    public const CLI_STATUS_REMAIN = 1;
94
95
    /**
96
     * (some) queue items where processed
97
     * @deprecated since 9.2.5 will be removed in v11.x
98
     */
99
    public const CLI_STATUS_PROCESSED = 2;
100
101
    /**
102
     * instance didn't finish
103
     * @deprecated since 9.2.5 will be removed in v11.x
104
     */
105
    public const CLI_STATUS_ABORTED = 4;
106
107
    /**
108
     * @deprecated since 9.2.5 will be removed in v11.x
109
     */
110
    public const CLI_STATUS_POLLABLE_PROCESSED = 8;
111
112
    /**
113
     * @var integer
114
     */
115
    public $setID = 0;
116
117
    /**
118
     * @var string
119
     */
120
    public $processID = '';
121
122
    /**
123
     * @var array
124
     */
125
    public $duplicateTrack = [];
126
127
    /**
128
     * @var array
129
     */
130
    public $downloadUrls = [];
131
132
    /**
133
     * @var array
134
     */
135
    public $incomingProcInstructions = [];
136
137
    /**
138
     * @var array
139
     */
140
    public $incomingConfigurationSelection = [];
141
142
    /**
143
     * @var bool
144
     */
145
    public $registerQueueEntriesInternallyOnly = false;
146
147
    /**
148
     * @var array
149
     */
150
    public $queueEntries = [];
151
152
    /**
153
     * @var array
154
     */
155
    public $urlList = [];
156
157
    /**
158
     * @var array
159
     */
160
    public $extensionSettings = [];
161
162
    /**
163
     * Mount Point
164
     *
165
     * @var bool
166
     * Todo: Check what this is used for and adjust the type hint or code, as bool doesn't match the current code.
167
     */
168
    public $MP = false;
169
170
    /**
171
     * @var string
172
     * @deprecated
173
     */
174
    protected $processFilename;
175
176
    /**
177
     * Holds the internal access mode can be 'gui','cli' or 'cli_im'
178
     *
179
     * @var string
180
     * @deprecated
181
     */
182
    protected $accessMode;
183
184
    /**
185
     * @var QueueRepository
186
     */
187
    protected $queueRepository;
188
189
    /**
190
     * @var ProcessRepository
191
     */
192
    protected $processRepository;
193
194
    /**
195
     * @var ConfigurationRepository
196
     */
197
    protected $configurationRepository;
198
199
    /**
200
     * @var string
201
     * @deprecated Since v9.2.5 - This will be remove in v10
202
     */
203
    protected $tableName = 'tx_crawler_queue';
204
205
    /**
206
     * @var QueueExecutor
207
     */
208
    protected $queueExecutor;
209
210
    /**
211
     * @var int
212
     */
213
    protected $maximumUrlsToCompile = 10000;
214
215
    /**
216
     * @var IconFactory
217
     */
218
    protected $iconFactory;
219
220
    /**
221
     * @var string[]
222
     */
223
    private $deprecatedPublicMethods = [
0 ignored issues
show
introduced by
The private property $deprecatedPublicMethods is not used, and could be removed.
Loading history...
224
        'compileUrls' => 'Using CrawlerController->compileUrls() is deprecated since 9.2.5, and will be removed in v11.x',
225
        'cleanUpOldQueueEntries' => 'Using CrawlerController::cleanUpOldQueueEntries() is deprecated since 9.0.1 and will be removed in v11.x, please use QueueRepository->cleanUpOldQueueEntries() instead.',
226
        'CLI_buildProcessId' => 'Using CrawlerController->CLI_buildProcessId() is deprecated since 9.2.5 and will be removed in v11.x',
227
        'CLI_checkAndAcquireNewProcess' => 'Using CrawlerController->CLI_checkAndAcquireNewProcess() is deprecated since 9.2.5 and will be removed in v11.x',
228
        'CLI_debug' => 'Using CrawlerController->CLI_debug() is deprecated since 9.1.3 and will be removed in v11.x',
229
        'CLI_releaseProcesses' => 'Using CrawlerController->CLI_releaseProcesses() is deprecated since 9.2.2 and will be removed in v11.x',
230
        'CLI_run' => 'Using CrawlerController->CLI_run() is deprecated since 9.2.2 and will be removed in v11.x',
231
        'CLI_runHooks' => 'Using CrawlerController->CLI_runHooks() is deprecated since 9.1.5 and will be removed in v11.x',
232
        'expandExcludeString' => 'Using CrawlerController->expandExcludeString() is deprecated since 9.2.5 and will be removed in v11.x',
233
        'getAccessMode' => 'Using CrawlerController->getAccessMode() is deprecated since 9.1.3 and will be removed in v11.x',
234
        'getLogEntriesForPageId' => 'Using CrawlerController->getLogEntriesForPageId() is deprecated since 9.1.5 and will be remove in v11.x',
235
        'getLogEntriesForSetId' => 'Using crawlerController::getLogEntriesForSetId() is deprecated since 9.0.1 and will be removed in v11.x',
236
        'hasGroupAccess' => 'Using CrawlerController->getLogEntriesForPageId() is deprecated since 9.2.2 and will be remove in v11.x, please use UserService::hasGroupAccess() instead.',
237
        'flushQueue' => 'Using CrawlerController::flushQueue() is deprecated since 9.0.1 and will be removed in v11.x, please use QueueRepository->flushQueue() instead.',
238
        'setAccessMode' => 'Using CrawlerController->setAccessMode() is deprecated since 9.1.3 and will be removed in v11.x',
239
        'getDisabled' => 'Using CrawlerController->getDisabled() is deprecated since 9.1.3 and will be removed in v11.x, please use Crawler->isDisabled() instead',
240
        'setDisabled' => 'Using CrawlerController->setDisabled() is deprecated since 9.1.3 and will be removed in v11.x, please use Crawler->setDisabled() instead',
241
        'getProcessFilename' => 'Using CrawlerController->getProcessFilename() is deprecated since 9.1.3 and will be removed in v11.x',
242
        'setProcessFilename' => 'Using CrawlerController->setProcessFilename() is deprecated since 9.1.3 and will be removed in v11.x',
243
        'getDuplicateRowsIfExist' => 'Using CrawlerController->getDuplicateRowsIfExist() is deprecated since 9.1.4 and will be remove in v11.x, please use QueueRepository->getDuplicateQueueItemsIfExists() instead',
244
        'checkIfPageShouldBeSkipped' => 'Using CrawlerController->checkIfPageShouldBeSkipped() is deprecated since 9.2.5 and will be removed in v11.x',
245
        'swapIfFirstIsLargerThanSecond' => 'Using CrawlerController->swapIfFirstIsLargerThanSecond() is deprecated since 9.2.5, and will be removed in v11.x',
246
        'expandParameters' => 'Using CrawlerController->expandParameters() is deprecated since 9.2.5, and will be removed in v11.x',
247
    ];
248
249
    /**
250
     * @var string[]
251
     */
252
    private $deprecatedPublicProperties = [
253
        'accessMode' => 'Using CrawlerController->accessMode is deprecated since 9.1.3 and will be removed in v11.x',
254
        'processFilename' => 'Using CrawlerController->accessMode is deprecated since 9.1.3 and will be removed in v11.x',
255
    ];
256
257
    /**
258
     * @var BackendUserAuthentication|null
259
     */
260
    private $backendUser;
261
262
    /**
263
     * @var integer
264
     */
265
    private $scheduledTime = 0;
266
267
    /**
268
     * @var integer
269
     */
270
    private $reqMinute = 0;
271
272
    /**
273
     * @var bool
274
     */
275
    private $submitCrawlUrls = false;
276
277
    /**
278
     * @var bool
279
     */
280
    private $downloadCrawlUrls = false;
281
282
    /**
283
     * @var PageRepository
284
     */
285
    private $pageRepository;
286
287
    /**
288
     * @var Crawler
289
     */
290
    private $crawler;
291
292
    /**
293
     * @var ConfigurationService
294
     */
295
    private $configurationService;
296
297
    /**
298
     * @var UrlService
299
     */
300
    private $urlService;
301
302
    /************************************
303
     *
304
     * Getting URLs based on Page TSconfig
305
     *
306
     ************************************/
307
308
    public function __construct()
309
    {
310
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
311
        $crawlStrategyFactory = GeneralUtility::makeInstance(CrawlStrategyFactory::class);
312
        $this->queueRepository = $objectManager->get(QueueRepository::class);
313
        $this->processRepository = $objectManager->get(ProcessRepository::class);
314
        $this->configurationRepository = $objectManager->get(ConfigurationRepository::class);
315
        $this->pageRepository = GeneralUtility::makeInstance(PageRepository::class);
316
        $this->queueExecutor = GeneralUtility::makeInstance(QueueExecutor::class, $crawlStrategyFactory);
317
        $this->iconFactory = GeneralUtility::makeInstance(IconFactory::class);
318
        $this->crawler = GeneralUtility::makeInstance(Crawler::class);
319
        $this->configurationService = GeneralUtility::makeInstance(ConfigurationService::class);
320
        $this->urlService = GeneralUtility::makeInstance(UrlService::class);
321
322
        $this->processFilename = Environment::getVarPath() . '/lock/tx_crawler.proc';
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

322
        /** @scrutinizer ignore-deprecated */ $this->processFilename = Environment::getVarPath() . '/lock/tx_crawler.proc';
Loading history...
323
324
        /** @var ExtensionConfigurationProvider $configurationProvider */
325
        $configurationProvider = GeneralUtility::makeInstance(ExtensionConfigurationProvider::class);
326
        $settings = $configurationProvider->getExtensionConfiguration();
327
        $this->extensionSettings = is_array($settings) ? $settings : [];
328
329
        if (MathUtility::convertToPositiveInteger($this->extensionSettings['countInARun']) === 0) {
330
            $this->extensionSettings['countInARun'] = 100;
331
        }
332
333
        $this->extensionSettings['processLimit'] = MathUtility::forceIntegerInRange($this->extensionSettings['processLimit'], 1, 99, 1);
334
        $this->setMaximumUrlsToCompile(MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000));
335
    }
336
337
    public function setMaximumUrlsToCompile(int $maximumUrlsToCompile): void
338
    {
339
        $this->maximumUrlsToCompile = $maximumUrlsToCompile;
340
    }
341
342
    /**
343
     * Method to set the accessMode can be gui, cli or cli_im
344
     *
345
     * @return string
346
     * @deprecated
347
     */
348
    public function getAccessMode()
349
    {
350
        return $this->accessMode;
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...Controller::$accessMode has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

350
        return /** @scrutinizer ignore-deprecated */ $this->accessMode;
Loading history...
351
    }
352
353
    /**
354
     * @param string $accessMode
355
     * @deprecated
356
     */
357
    public function setAccessMode($accessMode): void
358
    {
359
        $this->accessMode = $accessMode;
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...Controller::$accessMode has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

359
        /** @scrutinizer ignore-deprecated */ $this->accessMode = $accessMode;
Loading history...
360
    }
361
362
    /**
363
     * Set disabled status to prevent processes from being processed
364
     * @deprecated
365
     */
366
    public function setDisabled(?bool $disabled = true): void
367
    {
368
        if ($disabled) {
369
            GeneralUtility::writeFile($this->processFilename, 'disabled');
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

369
            GeneralUtility::writeFile(/** @scrutinizer ignore-deprecated */ $this->processFilename, 'disabled');
Loading history...
370
        } elseif (is_file($this->processFilename)) {
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

370
        } elseif (is_file(/** @scrutinizer ignore-deprecated */ $this->processFilename)) {
Loading history...
371
            unlink($this->processFilename);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

371
            unlink(/** @scrutinizer ignore-deprecated */ $this->processFilename);
Loading history...
372
        }
373
    }
374
375
    /**
376
     * Get disable status
377
     * @deprecated
378
     */
379
    public function getDisabled(): bool
380
    {
381
        return is_file($this->processFilename);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

381
        return is_file(/** @scrutinizer ignore-deprecated */ $this->processFilename);
Loading history...
382
    }
383
384
    /**
385
     * @param string $filenameWithPath
386
     * @deprecated
387
     */
388
    public function setProcessFilename($filenameWithPath): void
389
    {
390
        $this->processFilename = $filenameWithPath;
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

390
        /** @scrutinizer ignore-deprecated */ $this->processFilename = $filenameWithPath;
Loading history...
391
    }
392
393
    /**
394
     * @param string $processID
395
     * @deprecated
396
     */
397
    public function setProcessID($processID): void
398
    {
399
        $this->processID = $processID;
400
    }
401
402
    /**
403
     * @return string
404
     * @deprecated
405
     */
406
    public function getProcessFilename()
407
    {
408
        return $this->processFilename;
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

408
        return /** @scrutinizer ignore-deprecated */ $this->processFilename;
Loading history...
409
    }
410
411
    /**
412
     * Sets the extensions settings (unserialized pendant of $TYPO3_CONF_VARS['EXT']['extConf']['crawler']).
413
     */
414
    public function setExtensionSettings(array $extensionSettings): void
415
    {
416
        $this->extensionSettings = $extensionSettings;
417
    }
418
419
    /**
420
     * Check if the given page should be crawled
421
     *
422
     * @return false|string false if the page should be crawled (not excluded), true / skipMessage if it should be skipped
423
     * @deprecated
424
     */
425
    public function checkIfPageShouldBeSkipped(array $pageRow)
426
    {
427
        $pageService = GeneralUtility::makeInstance(PageService::class);
428
        return $pageService->checkIfPageShouldBeSkipped($pageRow);
429
    }
430
431
    /**
432
     * Wrapper method for getUrlsForPageId()
433
     * It returns an array of configurations and no urls!
434
     *
435
     * @param array $pageRow Page record with at least dok-type and uid columns.
436
     * @param string $skipMessage
437
     * @return array
438
     * @see getUrlsForPageId()
439
     */
440
    public function getUrlsForPageRow(array $pageRow, &$skipMessage = '')
441
    {
442
        if (! is_int($pageRow['uid'])) {
443
            $skipMessage = 'PageUid ' . $pageRow['uid'] . ' was not an integer';
444
            return [];
445
        }
446
447
        $message = $this->getPageService()->checkIfPageShouldBeSkipped($pageRow);
448
        if ($message === false) {
449
            $res = $this->getUrlsForPageId($pageRow['uid']);
450
            $skipMessage = '';
451
        } else {
452
            $skipMessage = $message;
453
            $res = [];
454
        }
455
456
        return $res;
457
    }
458
459
    /**
460
     * Creates a list of URLs from input array (and submits them to queue if asked for)
461
     * See Web > Info module script + "indexed_search"'s crawler hook-client using this!
462
     *
463
     * @param array $vv Information about URLs from pageRow to crawl.
464
     * @param array $pageRow Page row
465
     * @param int $scheduledTime Unix time to schedule indexing to, typically time()
466
     * @param int $reqMinute Number of requests per minute (creates the interleave between requests)
467
     * @param bool $submitCrawlUrls If set, submits the URLs to queue
468
     * @param bool $downloadCrawlUrls If set (and submitcrawlUrls is false) will fill $downloadUrls with entries)
469
     * @param array $duplicateTrack Array which is passed by reference and contains the an id per url to secure we will not crawl duplicates
470
     * @param array $downloadUrls Array which will be filled with URLS for download if flag is set.
471
     * @param array $incomingProcInstructions Array of processing instructions
472
     * @return string List of URLs (meant for display in backend module)
473
     */
474
    public function urlListFromUrlArray(
475
        array $vv,
476
        array $pageRow,
477
        $scheduledTime,
478
        $reqMinute,
479
        $submitCrawlUrls,
480
        $downloadCrawlUrls,
481
        array &$duplicateTrack,
482
        array &$downloadUrls,
483
        array $incomingProcInstructions
484
    ) {
485
        if (! is_array($vv['URLs'])) {
486
            return 'ERROR - no URL generated';
487
        }
488
        $urlLog = [];
489
        $pageId = (int) $pageRow['uid'];
490
        $configurationHash = $this->getConfigurationHash($vv);
491
        $skipInnerCheck = $this->queueRepository->noUnprocessedQueueEntriesForPageWithConfigurationHashExist($pageId, $configurationHash);
492
493
        $urlService = new UrlService();
494
495
        foreach ($vv['URLs'] as $urlQuery) {
496
            if (! $this->drawURLs_PIfilter($vv['subCfg']['procInstrFilter'], $incomingProcInstructions)) {
497
                continue;
498
            }
499
            $url = (string) $urlService->getUrlFromPageAndQueryParameters(
500
                $pageId,
501
                $urlQuery,
502
                $vv['subCfg']['baseUrl'] ?? null,
503
                $vv['subCfg']['force_ssl'] ?? 0
504
            );
505
506
            // Create key by which to determine unique-ness:
507
            $uKey = $url . '|' . $vv['subCfg']['userGroups'] . '|' . $vv['subCfg']['procInstrFilter'];
508
509
            if (isset($duplicateTrack[$uKey])) {
510
                //if the url key is registered just display it and do not resubmit is
511
                $urlLog[] = '<em><span class="text-muted">' . htmlspecialchars($url) . '</span></em>';
512
            } else {
513
                // Scheduled time:
514
                $schTime = $scheduledTime + round(count($duplicateTrack) * (60 / $reqMinute));
515
                $schTime = intval($schTime / 60) * 60;
516
                $formattedDate = BackendUtility::datetime($schTime);
517
                $this->urlList[] = '[' . $formattedDate . '] ' . $url;
518
                $urlList = '[' . $formattedDate . '] ' . htmlspecialchars($url);
519
520
                // Submit for crawling!
521
                if ($submitCrawlUrls) {
522
                    $added = $this->addUrl(
523
                        $pageId,
524
                        $url,
525
                        $vv['subCfg'],
526
                        $scheduledTime,
527
                        $configurationHash,
528
                        $skipInnerCheck
529
                    );
530
                    if ($added === false) {
531
                        $urlList .= ' (URL already existed)';
532
                    }
533
                } elseif ($downloadCrawlUrls) {
534
                    $downloadUrls[$url] = $url;
535
                }
536
                $urlLog[] = $urlList;
537
            }
538
            $duplicateTrack[$uKey] = true;
539
        }
540
541
        return implode('<br>', $urlLog);
542
    }
543
544
    /**
545
     * Returns true if input processing instruction is among registered ones.
546
     *
547
     * @param string $piString PI to test
548
     * @param array $incomingProcInstructions Processing instructions
549
     * @return boolean
550
     */
551
    public function drawURLs_PIfilter($piString, array $incomingProcInstructions)
552
    {
553
        if (empty($incomingProcInstructions)) {
554
            return true;
555
        }
556
557
        foreach ($incomingProcInstructions as $pi) {
558
            if (GeneralUtility::inList($piString, $pi)) {
559
                return true;
560
            }
561
        }
562
        return false;
563
    }
564
565
    public function getPageTSconfigForId(int $id): array
566
    {
567
        if (! $this->MP) {
568
            $pageTSconfig = BackendUtility::getPagesTSconfig($id);
569
        } else {
570
            // TODO: Please check, this makes no sense to split a boolean value.
571
            [, $mountPointId] = explode('-', $this->MP);
0 ignored issues
show
Bug introduced by
$this->MP of type true is incompatible with the type string expected by parameter $string of explode(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

571
            [, $mountPointId] = explode('-', /** @scrutinizer ignore-type */ $this->MP);
Loading history...
572
            $pageTSconfig = BackendUtility::getPagesTSconfig($mountPointId);
0 ignored issues
show
Bug introduced by
$mountPointId of type string is incompatible with the type integer expected by parameter $id of TYPO3\CMS\Backend\Utilit...ity::getPagesTSconfig(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

572
            $pageTSconfig = BackendUtility::getPagesTSconfig(/** @scrutinizer ignore-type */ $mountPointId);
Loading history...
573
        }
574
575
        // Call a hook to alter configuration
576
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['getPageTSconfigForId'])) {
577
            $params = [
578
                'pageId' => $id,
579
                'pageTSConfig' => &$pageTSconfig,
580
            ];
581
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['getPageTSconfigForId'] as $userFunc) {
582
                GeneralUtility::callUserFunction($userFunc, $params, $this);
583
            }
584
        }
585
        return $pageTSconfig;
586
    }
587
588
    /**
589
     * This methods returns an array of configurations.
590
     * Adds no urls!
591
     */
592
    public function getUrlsForPageId(int $pageId): array
593
    {
594
        // Get page TSconfig for page ID
595
        $pageTSconfig = $this->getPageTSconfigForId($pageId);
596
597
        $mountPoint = is_string($this->MP) ? $this->MP : '';
0 ignored issues
show
introduced by
The condition is_string($this->MP) is always false.
Loading history...
598
599
        $res = [];
600
601
        // Fetch Crawler Configuration from pageTSConfig
602
        $res = $this->configurationService->getConfigurationFromPageTS($pageTSconfig, $pageId, $res, $mountPoint);
603
604
        // Get configuration from tx_crawler_configuration records up the rootline
605
        $res = $this->configurationService->getConfigurationFromDatabase($pageId, $res);
606
607
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'] ?? [] as $func) {
608
            $params = [
609
                'res' => &$res,
610
            ];
611
            GeneralUtility::callUserFunction($func, $params, $this);
612
        }
613
        return $res;
614
    }
615
616
    /**
617
     * Find all configurations of subpages of a page
618
     * TODO: Write Functional Tests
619
     */
620
    public function getConfigurationsForBranch(int $rootid, int $depth): array
621
    {
622
        $configurationsForBranch = [];
623
        $pageTSconfig = $this->getPageTSconfigForId($rootid);
624
        $sets = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.'] ?? [];
625
        foreach ($sets as $key => $value) {
626
            if (! is_array($value)) {
627
                continue;
628
            }
629
            $configurationsForBranch[] = substr($key, -1) === '.' ? substr($key, 0, -1) : $key;
630
        }
631
        $pids = [];
632
        $rootLine = BackendUtility::BEgetRootLine($rootid);
633
        foreach ($rootLine as $node) {
634
            $pids[] = $node['uid'];
635
        }
636
        /* @var PageTreeView $tree */
637
        $tree = GeneralUtility::makeInstance(PageTreeView::class);
638
        $perms_clause = $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW);
639
        $tree->init(empty($perms_clause) ? '' : ('AND ' . $perms_clause));
640
        $tree->getTree($rootid, $depth, '');
641
        foreach ($tree->tree as $node) {
642
            $pids[] = $node['row']['uid'];
643
        }
644
645
        $configurations = $this->configurationRepository->getCrawlerConfigurationRecordsFromRootLine($rootid, $pids);
646
647
        foreach($configurations as $configuration) {
648
            $configurationsForBranch[] = $configuration['name'];
649
        }
650
        return $configurationsForBranch;
651
    }
652
653
    /**
654
     * Check if a user has access to an item
655
     * (e.g. get the group list of the current logged in user from $GLOBALS['TSFE']->gr_list)
656
     *
657
     * @param string $groupList Comma-separated list of (fe_)group UIDs from a user
658
     * @param string $accessList Comma-separated list of (fe_)group UIDs of the item to access
659
     * @return bool TRUE if at least one of the users group UIDs is in the access list or the access list is empty
660
     * @see \TYPO3\CMS\Frontend\Page\PageRepository::getMultipleGroupsWhereClause()
661
     * @deprecated
662
     * @codeCoverageIgnore
663
     */
664
    public function hasGroupAccess($groupList, $accessList)
665
    {
666
        if (empty($accessList)) {
667
            return true;
668
        }
669
        foreach (GeneralUtility::intExplode(',', $groupList) as $groupUid) {
670
            if (GeneralUtility::inList($accessList, $groupUid)) {
671
                return true;
672
            }
673
        }
674
        return false;
675
    }
676
677
    /**
678
     * Will expand the parameters configuration to individual values. This follows a certain syntax of the value of each parameter.
679
     * Syntax of values:
680
     * - Basically: If the value is wrapped in [...] it will be expanded according to the following syntax, otherwise the value is taken literally
681
     * - Configuration is splitted by "|" and the parts are processed individually and finally added together
682
     * - For each configuration part:
683
     *         - "[int]-[int]" = Integer range, will be expanded to all values in between, values included, starting from low to high (max. 1000). Example "1-34" or "-40--30"
684
     *         - "_TABLE:[TCA table name];[_PID:[optional page id, default is current page]];[_ENABLELANG:1]" = Look up of table records from PID, filtering out deleted records. Example "_TABLE:tt_content; _PID:123"
685
     *        _ENABLELANG:1 picks only original records without their language overlays
686
     *         - Default: Literal value
687
     *
688
     * @param array $paramArray Array with key (GET var name) and values (value of GET var which is configuration for expansion)
689
     * @param integer $pid Current page ID
690
     * @return array
691
     * @deprecated
692
     * @codeCoverageIgnore
693
     *
694
     */
695
    public function expandParameters($paramArray, $pid)
696
    {
697
        // Traverse parameter names:
698
        foreach ($paramArray as $p => $v) {
699
            $v = trim($v);
700
701
            // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal
702
            if (strpos($v, '[') === 0 && substr($v, -1) === ']') {
703
                // So, find the value inside brackets and reset the paramArray value as an array.
704
                $v = substr($v, 1, -1);
705
                $paramArray[$p] = [];
706
707
                // Explode parts and traverse them:
708
                $parts = explode('|', $v);
709
                foreach ($parts as $pV) {
710
711
                    // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30)
712
                    if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($pV), $reg)) {
713
                        $reg = $this->swapIfFirstIsLargerThanSecond($reg);
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...rstIsLargerThanSecond() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

713
                        $reg = /** @scrutinizer ignore-deprecated */ $this->swapIfFirstIsLargerThanSecond($reg);
Loading history...
714
715
                        // Traverse range, add values:
716
                        // Limit to size of range!
717
                        $runAwayBrake = 1000;
718
                        for ($a = $reg[1]; $a <= $reg[2]; $a++) {
719
                            $paramArray[$p][] = $a;
720
                            $runAwayBrake--;
721
                            if ($runAwayBrake <= 0) {
722
                                break;
723
                            }
724
                        }
725
                    } elseif (strpos(trim($pV), '_TABLE:') === 0) {
726
727
                        // Parse parameters:
728
                        $subparts = GeneralUtility::trimExplode(';', $pV);
729
                        $subpartParams = [];
730
                        foreach ($subparts as $spV) {
731
                            [$pKey, $pVal] = GeneralUtility::trimExplode(':', $spV);
732
                            $subpartParams[$pKey] = $pVal;
733
                        }
734
735
                        // Table exists:
736
                        if (isset($GLOBALS['TCA'][$subpartParams['_TABLE']])) {
737
                            $lookUpPid = isset($subpartParams['_PID']) ? intval($subpartParams['_PID']) : intval($pid);
738
                            $recursiveDepth = isset($subpartParams['_RECURSIVE']) ? intval($subpartParams['_RECURSIVE']) : 0;
739
                            $pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid';
740
                            $where = $subpartParams['_WHERE'] ?? '';
741
                            $addTable = $subpartParams['_ADDTABLE'] ?? '';
742
743
                            $fieldName = $subpartParams['_FIELD'] ? $subpartParams['_FIELD'] : 'uid';
744
                            if ($fieldName === 'uid' || $GLOBALS['TCA'][$subpartParams['_TABLE']]['columns'][$fieldName]) {
745
                                $queryBuilder = $this->getQueryBuilder($subpartParams['_TABLE']);
746
747
                                if ($recursiveDepth > 0) {
748
                                    /** @var QueryGenerator $queryGenerator */
749
                                    $queryGenerator = GeneralUtility::makeInstance(QueryGenerator::class);
750
                                    $pidList = $queryGenerator->getTreeList($lookUpPid, $recursiveDepth, 0, 1);
751
                                    $pidArray = GeneralUtility::intExplode(',', $pidList);
752
                                } else {
753
                                    $pidArray = [(string) $lookUpPid];
754
                                }
755
756
                                $queryBuilder->getRestrictions()
757
                                    ->removeAll()
758
                                    ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
759
760
                                $queryBuilder
761
                                    ->select($fieldName)
762
                                    ->from($subpartParams['_TABLE'])
763
                                    ->where(
764
                                        $queryBuilder->expr()->in($pidField, $queryBuilder->createNamedParameter($pidArray, Connection::PARAM_INT_ARRAY)),
765
                                        $where
766
                                    );
767
768
                                if (! empty($addTable)) {
769
                                    // TODO: Check if this works as intended!
770
                                    $queryBuilder->add('from', $addTable);
771
                                }
772
                                $transOrigPointerField = $GLOBALS['TCA'][$subpartParams['_TABLE']]['ctrl']['transOrigPointerField'];
773
774
                                if ($subpartParams['_ENABLELANG'] && $transOrigPointerField) {
775
                                    $queryBuilder->andWhere(
776
                                        $queryBuilder->expr()->lte(
777
                                            $transOrigPointerField,
778
                                            0
779
                                        )
780
                                    );
781
                                }
782
783
                                $statement = $queryBuilder->execute();
784
785
                                $rows = [];
786
                                while ($row = $statement->fetch()) {
0 ignored issues
show
Deprecated Code introduced by
The function Doctrine\DBAL\ForwardCompatibility\Result::fetch() has been deprecated: Use fetchNumeric(), fetchAssociative() or fetchOne() instead. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

786
                                while ($row = /** @scrutinizer ignore-deprecated */ $statement->fetch()) {

This function has been deprecated. The supplier of the function has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.

Loading history...
787
                                    $rows[$row[$fieldName]] = $row;
788
                                }
789
790
                                if (is_array($rows)) {
791
                                    $paramArray[$p] = array_merge($paramArray[$p], array_keys($rows));
792
                                }
793
                            }
794
                        }
795
                    } else {
796
                        // Just add value:
797
                        $paramArray[$p][] = $pV;
798
                    }
799
                    // Hook for processing own expandParameters place holder
800
                    if (is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'])) {
801
                        $_params = [
802
                            'pObj' => &$this,
803
                            'paramArray' => &$paramArray,
804
                            'currentKey' => $p,
805
                            'currentValue' => $pV,
806
                            'pid' => $pid,
807
                        ];
808
                        foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $_funcRef) {
809
                            GeneralUtility::callUserFunction($_funcRef, $_params, $this);
810
                        }
811
                    }
812
                }
813
814
                // Make unique set of values and sort array by key:
815
                $paramArray[$p] = array_unique($paramArray[$p]);
816
                ksort($paramArray);
817
            } else {
818
                // Set the literal value as only value in array:
819
                $paramArray[$p] = [$v];
820
            }
821
        }
822
823
        return $paramArray;
824
    }
825
826
    /**
827
     * Compiling URLs from parameter array (output of expandParameters())
828
     * The number of URLs will be the multiplication of the number of parameter values for each key
829
     *
830
     * @param array $paramArray Output of expandParameters(): Array with keys (GET var names) and for each an array of values
831
     * @param array $urls URLs accumulated in this array (for recursion)
832
     * @deprecated
833
     * @codeCoverageIgnore
834
     */
835
    public function compileUrls(array $paramArray, array $urls): array
836
    {
837
        return $this->urlService->compileUrls($paramArray, $urls, $this->getMaximumUrlsToCompile());
838
    }
839
840
    /************************************
841
     *
842
     * Crawler log
843
     *
844
     ************************************/
845
846
    /**
847
     * Return array of records from crawler queue for input page ID
848
     *
849
     * @param integer $id Page ID for which to look up log entries.
850
     * @param boolean $doFlush If TRUE, then entries selected at DELETED(!) instead of selected!
851
     * @param boolean $doFullFlush
852
     * @param integer $itemsPerPage Limit the amount of entries per page default is 10
853
     * @return array
854
     *
855
     * @deprecated
856
     */
857
    public function getLogEntriesForPageId($id, QueueFilter $queueFilter, $doFlush = false, $doFullFlush = false, $itemsPerPage = 10)
0 ignored issues
show
Unused Code introduced by
The parameter $doFullFlush is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

857
    public function getLogEntriesForPageId($id, QueueFilter $queueFilter, $doFlush = false, /** @scrutinizer ignore-unused */ $doFullFlush = false, $itemsPerPage = 10)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
858
    {
859
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

859
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
860
        $queryBuilder
861
            ->select('*')
862
            ->from($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

862
            ->from(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
863
            ->where(
864
                $queryBuilder->expr()->eq('page_id', $queryBuilder->createNamedParameter($id, PDO::PARAM_INT))
865
            )
866
            ->orderBy('scheduled', 'DESC');
867
868
        $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
869
            ->getConnectionForTable($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

869
            ->getConnectionForTable(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
870
            ->getExpressionBuilder();
871
        $query = $expressionBuilder->andX();
0 ignored issues
show
Unused Code introduced by
The assignment to $query is dead and can be removed.
Loading history...
872
        // PHPStorm adds the highlight that the $addWhere is immediately overwritten,
873
        // but the $query = $expressionBuilder->andX() ensures that the $addWhere is written correctly with AND
874
        // between the statements, it's not a mistake in the code.
875
        switch ($queueFilter) {
876
            case 'pending':
877
                $queryBuilder->andWhere($queryBuilder->expr()->eq('exec_time', 0));
878
                break;
879
            case 'finished':
880
                $queryBuilder->andWhere($queryBuilder->expr()->gt('exec_time', 0));
881
                break;
882
        }
883
884
        if ($doFlush) {
885
            $this->queueRepository->flushQueue($queueFilter);
886
        }
887
        if ($itemsPerPage > 0) {
888
            $queryBuilder
889
                ->setMaxResults((int) $itemsPerPage);
890
        }
891
892
        return $queryBuilder->execute()->fetchAll();
893
    }
894
895
    /**
896
     * Return array of records from crawler queue for input set ID
897
     *
898
     * @param int $set_id Set ID for which to look up log entries.
899
     * @param string $filter Filter: "all" => all entries, "pending" => all that is not yet run, "finished" => all complete ones
900
     * @param bool $doFlush If TRUE, then entries selected at DELETED(!) instead of selected!
901
     * @param int $itemsPerPage Limit the amount of entries per page default is 10
902
     * @return array
903
     *
904
     * @deprecated
905
     */
906
    public function getLogEntriesForSetId(int $set_id, string $filter = '', bool $doFlush = false, bool $doFullFlush = false, int $itemsPerPage = 10)
907
    {
908
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

908
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
909
        $queryBuilder
910
            ->select('*')
911
            ->from($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

911
            ->from(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
912
            ->where(
913
                $queryBuilder->expr()->eq('set_id', $queryBuilder->createNamedParameter($set_id, PDO::PARAM_INT))
914
            )
915
            ->orderBy('scheduled', 'DESC');
916
917
        $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
918
            ->getConnectionForTable($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

918
            ->getConnectionForTable(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
919
            ->getExpressionBuilder();
920
        $query = $expressionBuilder->andX();
921
        // PHPStorm adds the highlight that the $addWhere is immediately overwritten,
922
        // but the $query = $expressionBuilder->andX() ensures that the $addWhere is written correctly with AND
923
        // between the statements, it's not a mistake in the code.
924
        $addWhere = '';
925
        switch ($filter) {
926
            case 'pending':
927
                $queryBuilder->andWhere($queryBuilder->expr()->eq('exec_time', 0));
928
                $addWhere = $query->add($expressionBuilder->eq('exec_time', 0));
0 ignored issues
show
Unused Code introduced by
The assignment to $addWhere is dead and can be removed.
Loading history...
929
                break;
930
            case 'finished':
931
                $queryBuilder->andWhere($queryBuilder->expr()->gt('exec_time', 0));
932
                $addWhere = $query->add($expressionBuilder->gt('exec_time', 0));
933
                break;
934
        }
935
        if ($doFlush) {
936
            $addWhere = $query->add($expressionBuilder->eq('set_id', (int) $set_id));
937
            $this->flushQueue($doFullFlush ? '' : $addWhere);
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...ontroller::flushQueue() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

937
            /** @scrutinizer ignore-deprecated */ $this->flushQueue($doFullFlush ? '' : $addWhere);
Loading history...
938
            return [];
939
        }
940
        if ($itemsPerPage > 0) {
941
            $queryBuilder
942
                ->setMaxResults((int) $itemsPerPage);
943
        }
944
945
        return $queryBuilder->execute()->fetchAll();
946
    }
947
948
    /**
949
     * Adding call back entries to log (called from hooks typically, see indexed search class "class.crawler.php"
950
     *
951
     * @param integer $setId Set ID
952
     * @param array $params Parameters to pass to call back function
953
     * @param string $callBack Call back object reference, eg. 'EXT:indexed_search/class.crawler.php:&tx_indexedsearch_crawler'
954
     * @param integer $page_id Page ID to attach it to
955
     * @param integer $schedule Time at which to activate
956
     */
957
    public function addQueueEntry_callBack($setId, $params, $callBack, $page_id = 0, $schedule = 0): void
958
    {
959
        if (! is_array($params)) {
0 ignored issues
show
introduced by
The condition is_array($params) is always true.
Loading history...
960
            $params = [];
961
        }
962
        $params['_CALLBACKOBJ'] = $callBack;
963
964
        GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME)
965
            ->insert(
966
                QueueRepository::TABLE_NAME,
967
                [
968
                    'page_id' => (int) $page_id,
969
                    'parameters' => json_encode($params),
970
                    'scheduled' => (int) $schedule ?: $this->getCurrentTime(),
971
                    'exec_time' => 0,
972
                    'set_id' => (int) $setId,
973
                    'result_data' => '',
974
                ]
975
            );
976
    }
977
978
    /************************************
979
     *
980
     * URL setting
981
     *
982
     ************************************/
983
984
    /**
985
     * Setting a URL for crawling:
986
     *
987
     * @param integer $id Page ID
988
     * @param string $url Complete URL
989
     * @param array $subCfg Sub configuration array (from TS config)
990
     * @param integer $tstamp Scheduled-time
991
     * @param string $configurationHash (optional) configuration hash
992
     * @param bool $skipInnerDuplicationCheck (optional) skip inner duplication check
993
     * @return bool
994
     */
995
    public function addUrl(
996
        $id,
997
        $url,
998
        array $subCfg,
999
        $tstamp,
1000
        $configurationHash = '',
1001
        $skipInnerDuplicationCheck = false
1002
    ) {
1003
        $urlAdded = false;
1004
        $rows = [];
1005
1006
        // Creating parameters:
1007
        $parameters = [
1008
            'url' => $url,
1009
        ];
1010
1011
        // fe user group simulation:
1012
        $uGs = implode(',', array_unique(GeneralUtility::intExplode(',', $subCfg['userGroups'], true)));
1013
        if ($uGs) {
1014
            $parameters['feUserGroupList'] = $uGs;
1015
        }
1016
1017
        // Setting processing instructions
1018
        $parameters['procInstructions'] = GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']);
1019
        if (is_array($subCfg['procInstrParams.'])) {
1020
            $parameters['procInstrParams'] = $subCfg['procInstrParams.'];
1021
        }
1022
1023
        // Compile value array:
1024
        $parameters_serialized = json_encode($parameters);
1025
        $fieldArray = [
1026
            'page_id' => (int) $id,
1027
            'parameters' => $parameters_serialized,
1028
            'parameters_hash' => GeneralUtility::shortMD5($parameters_serialized),
1029
            'configuration_hash' => $configurationHash,
1030
            'scheduled' => $tstamp,
1031
            'exec_time' => 0,
1032
            'set_id' => (int) $this->setID,
1033
            'result_data' => '',
1034
            'configuration' => $subCfg['key'],
1035
        ];
1036
1037
        if ($this->registerQueueEntriesInternallyOnly) {
1038
            //the entries will only be registered and not stored to the database
1039
            $this->queueEntries[] = $fieldArray;
1040
        } else {
1041
            if (! $skipInnerDuplicationCheck) {
1042
                // check if there is already an equal entry
1043
                $rows = $this->queueRepository->getDuplicateQueueItemsIfExists(
1044
                    (bool) $this->extensionSettings['enableTimeslot'],
1045
                    $tstamp,
1046
                    $this->getCurrentTime(),
1047
                    $fieldArray['page_id'],
1048
                    $fieldArray['parameters_hash']
1049
                );
1050
            }
1051
1052
            if (empty($rows)) {
1053
                $connectionForCrawlerQueue = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME);
1054
                $connectionForCrawlerQueue->insert(
1055
                    QueueRepository::TABLE_NAME,
1056
                    $fieldArray
1057
                );
1058
                $uid = $connectionForCrawlerQueue->lastInsertId(QueueRepository::TABLE_NAME, 'qid');
1059
                $rows[] = $uid;
1060
                $urlAdded = true;
1061
1062
                $signalPayload = ['uid' => $uid, 'fieldArray' => $fieldArray];
1063
                SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1063
                /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1064
                    self::class,
1065
                    SignalSlotUtility::SIGNAL_URL_ADDED_TO_QUEUE,
1066
                    $signalPayload
1067
                );
1068
            } else {
1069
                $signalPayload = ['rows' => $rows, 'fieldArray' => $fieldArray];
1070
                SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1070
                /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1071
                    self::class,
1072
                    SignalSlotUtility::SIGNAL_DUPLICATE_URL_IN_QUEUE,
1073
                    $signalPayload
1074
                );
1075
            }
1076
        }
1077
1078
        return $urlAdded;
1079
    }
1080
1081
    /**
1082
     * Returns the current system time
1083
     *
1084
     * @return int
1085
     */
1086
    public function getCurrentTime()
1087
    {
1088
        return time();
1089
    }
1090
1091
    /************************************
1092
     *
1093
     * URL reading
1094
     *
1095
     ************************************/
1096
1097
    /**
1098
     * Read URL for single queue entry
1099
     *
1100
     * @param integer $queueId
1101
     * @param boolean $force If set, will process even if exec_time has been set!
1102
     *
1103
     * @return int|null
1104
     */
1105
    public function readUrl($queueId, $force = false)
1106
    {
1107
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(QueueRepository::TABLE_NAME);
1108
        $ret = 0;
1109
        $this->logger->debug('crawler-readurl start ' . microtime(true));
0 ignored issues
show
Bug introduced by
The method debug() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1109
        $this->logger->/** @scrutinizer ignore-call */ 
1110
                       debug('crawler-readurl start ' . microtime(true));

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
1110
1111
        $queryBuilder
1112
            ->select('*')
1113
            ->from(QueueRepository::TABLE_NAME)
1114
            ->where(
1115
                $queryBuilder->expr()->eq('qid', $queryBuilder->createNamedParameter($queueId, PDO::PARAM_INT))
1116
            );
1117
        if (! $force) {
1118
            $queryBuilder
1119
                ->andWhere('exec_time = 0')
1120
                ->andWhere('process_scheduled > 0');
1121
        }
1122
        $queueRec = $queryBuilder->execute()->fetch();
1123
1124
        if (! is_array($queueRec)) {
1125
            return;
1126
        }
1127
1128
        SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1128
        /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1129
            self::class,
1130
            SignalSlotUtility::SIGNAL_QUEUEITEM_PREPROCESS,
1131
            [$queueId, &$queueRec]
1132
        );
1133
1134
        // Set exec_time to lock record:
1135
        $field_array = ['exec_time' => $this->getCurrentTime()];
1136
1137
        $this->setProcessID($queueRec['process_id']);
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...troller::setProcessID() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1137
        /** @scrutinizer ignore-deprecated */ $this->setProcessID($queueRec['process_id']);
Loading history...
1138
1139
        if (isset($this->processID)) {
1140
            //if mulitprocessing is used we need to store the id of the process which has handled this entry
1141
            $field_array['process_id_completed'] = $this->processID;
1142
        }
1143
1144
        GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME)
1145
            ->update(
1146
                QueueRepository::TABLE_NAME,
1147
                $field_array,
1148
                ['qid' => (int) $queueId]
1149
            );
1150
1151
        $result = $this->queueExecutor->executeQueueItem($queueRec, $this);
1152
        if ($result['content'] === null) {
1153
            $resultData = 'An errors happened';
0 ignored issues
show
Unused Code introduced by
The assignment to $resultData is dead and can be removed.
Loading history...
1154
        } else {
1155
            /** @var JsonCompatibilityConverter $jsonCompatibilityConverter */
1156
            $jsonCompatibilityConverter = GeneralUtility::makeInstance(JsonCompatibilityConverter::class);
1157
            $resultData = $jsonCompatibilityConverter->convert($result['content']);
1158
1159
            //atm there's no need to point to specific pollable extensions
1160
            if (is_array($resultData) && is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pollSuccess'])) {
1161
                foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pollSuccess'] as $pollable) {
1162
                    // only check the success value if the instruction is runnig
1163
                    // it is important to name the pollSuccess key same as the procInstructions key
1164
                    if (is_array($resultData['parameters']['procInstructions'])
1165
                        && in_array(
1166
                            $pollable,
1167
                            $resultData['parameters']['procInstructions'], true
1168
                        )
1169
                    ) {
1170
                        if (! empty($resultData['success'][$pollable]) && $resultData['success'][$pollable]) {
1171
                            $ret |= self::CLI_STATUS_POLLABLE_PROCESSED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Controller\C...ATUS_POLLABLE_PROCESSED has been deprecated: since 9.2.5 will be removed in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1171
                            $ret |= /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_POLLABLE_PROCESSED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
1172
                        }
1173
                    }
1174
                }
1175
            }
1176
        }
1177
        // Set result in log which also denotes the end of the processing of this entry.
1178
        $field_array = ['result_data' => json_encode($result)];
1179
1180
        SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1180
        /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1181
            self::class,
1182
            SignalSlotUtility::SIGNAL_QUEUEITEM_POSTPROCESS,
1183
            [$queueId, &$field_array]
1184
        );
1185
1186
        GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME)
1187
            ->update(
1188
                QueueRepository::TABLE_NAME,
1189
                $field_array,
1190
                ['qid' => (int) $queueId]
1191
            );
1192
1193
        $this->logger->debug('crawler-readurl stop ' . microtime(true));
1194
        return $ret;
1195
    }
1196
1197
    /**
1198
     * Read URL for not-yet-inserted log-entry
1199
     *
1200
     * @param array $field_array Queue field array,
1201
     *
1202
     * @return array|bool|mixed|string
1203
     */
1204
    public function readUrlFromArray($field_array)
1205
    {
1206
        // Set exec_time to lock record:
1207
        $field_array['exec_time'] = $this->getCurrentTime();
1208
        $connectionForCrawlerQueue = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME);
1209
        $connectionForCrawlerQueue->insert(
1210
            QueueRepository::TABLE_NAME,
1211
            $field_array
1212
        );
1213
        $queueId = $field_array['qid'] = $connectionForCrawlerQueue->lastInsertId(QueueRepository::TABLE_NAME, 'qid');
1214
        $result = $this->queueExecutor->executeQueueItem($field_array, $this);
1215
1216
        // Set result in log which also denotes the end of the processing of this entry.
1217
        $field_array = ['result_data' => json_encode($result)];
1218
1219
        SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1219
        /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1220
            self::class,
1221
            SignalSlotUtility::SIGNAL_QUEUEITEM_POSTPROCESS,
1222
            [$queueId, &$field_array]
1223
        );
1224
1225
        $connectionForCrawlerQueue->update(
1226
            QueueRepository::TABLE_NAME,
1227
            $field_array,
1228
            ['qid' => $queueId]
1229
        );
1230
1231
        return $result;
1232
    }
1233
1234
    /*****************************
1235
     *
1236
     * Compiling URLs to crawl - tools
1237
     *
1238
     *****************************/
1239
1240
    /**
1241
     * @param integer $id Root page id to start from.
1242
     * @param integer $depth Depth of tree, 0=only id-page, 1= on sublevel, 99 = infinite
1243
     * @param integer $scheduledTime Unix Time when the URL is timed to be visited when put in queue
1244
     * @param integer $reqMinute Number of requests per minute (creates the interleave between requests)
1245
     * @param boolean $submitCrawlUrls If set, submits the URLs to queue in database (real crawling)
1246
     * @param boolean $downloadCrawlUrls If set (and submitcrawlUrls is false) will fill $downloadUrls with entries)
1247
     * @param array $incomingProcInstructions Array of processing instructions
1248
     * @param array $configurationSelection Array of configuration keys
1249
     * @return string
1250
     */
1251
    public function getPageTreeAndUrls(
1252
        $id,
1253
        $depth,
1254
        $scheduledTime,
1255
        $reqMinute,
1256
        $submitCrawlUrls,
1257
        $downloadCrawlUrls,
1258
        array $incomingProcInstructions,
1259
        array $configurationSelection
1260
    ) {
1261
        $this->scheduledTime = $scheduledTime;
1262
        $this->reqMinute = $reqMinute;
1263
        $this->submitCrawlUrls = $submitCrawlUrls;
1264
        $this->downloadCrawlUrls = $downloadCrawlUrls;
1265
        $this->incomingProcInstructions = $incomingProcInstructions;
1266
        $this->incomingConfigurationSelection = $configurationSelection;
1267
1268
        $this->duplicateTrack = [];
1269
        $this->downloadUrls = [];
1270
1271
        // Drawing tree:
1272
        /* @var PageTreeView $tree */
1273
        $tree = GeneralUtility::makeInstance(PageTreeView::class);
1274
        $perms_clause = $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW);
1275
        $tree->init('AND ' . $perms_clause);
1276
1277
        $pageInfo = BackendUtility::readPageAccess($id, $perms_clause);
1278
        if (is_array($pageInfo)) {
1279
            // Set root row:
1280
            $tree->tree[] = [
1281
                'row' => $pageInfo,
1282
                'HTML' => $this->iconFactory->getIconForRecord('pages', $pageInfo, Icon::SIZE_SMALL),
1283
            ];
1284
        }
1285
1286
        // Get branch beneath:
1287
        if ($depth) {
1288
            $tree->getTree($id, $depth, '');
1289
        }
1290
1291
        // Traverse page tree:
1292
        $code = '';
1293
1294
        foreach ($tree->tree as $data) {
1295
            $this->MP = false;
1296
1297
            // recognize mount points
1298
            if ($data['row']['doktype'] === PageRepository::DOKTYPE_MOUNTPOINT) {
1299
                $mountpage = $this->pageRepository->getPage($data['row']['uid']);
1300
1301
                // fetch mounted pages
1302
                $this->MP = $mountpage[0]['mount_pid'] . '-' . $data['row']['uid'];
0 ignored issues
show
Documentation Bug introduced by
The property $MP was declared of type boolean, but $mountpage[0]['mount_pid...' . $data['row']['uid'] is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
1303
1304
                $mountTree = GeneralUtility::makeInstance(PageTreeView::class);
1305
                $mountTree->init('AND ' . $perms_clause);
1306
                $mountTree->getTree($mountpage[0]['mount_pid'], $depth);
1307
1308
                foreach ($mountTree->tree as $mountData) {
1309
                    $code .= $this->drawURLs_addRowsForPage(
1310
                        $mountData['row'],
1311
                        $mountData['HTML'] . BackendUtility::getRecordTitle('pages', $mountData['row'], true)
1312
                    );
1313
                }
1314
1315
                // replace page when mount_pid_ol is enabled
1316
                if ($mountpage[0]['mount_pid_ol']) {
1317
                    $data['row']['uid'] = $mountpage[0]['mount_pid'];
1318
                } else {
1319
                    // if the mount_pid_ol is not set the MP must not be used for the mountpoint page
1320
                    $this->MP = false;
1321
                }
1322
            }
1323
1324
            $code .= $this->drawURLs_addRowsForPage(
1325
                $data['row'],
1326
                $data['HTML'] . BackendUtility::getRecordTitle('pages', $data['row'], true)
1327
            );
1328
        }
1329
1330
        return $code;
1331
    }
1332
1333
    /**
1334
     * Expands exclude string
1335
     *
1336
     * @param string $excludeString Exclude string
1337
     * @return array
1338
     * @deprecated
1339
     */
1340
    public function expandExcludeString($excludeString)
1341
    {
1342
        return $this->configurationService->expandExcludeString($excludeString);
1343
    }
1344
1345
    /**
1346
     * Create the rows for display of the page tree
1347
     * For each page a number of rows are shown displaying GET variable configuration
1348
     */
1349
    public function drawURLs_addRowsForPage(array $pageRow, string $pageTitle): string
1350
    {
1351
        $skipMessage = '';
1352
1353
        // Get list of configurations
1354
        $configurations = $this->getUrlsForPageRow($pageRow, $skipMessage);
1355
        $configurations = ConfigurationService::removeDisallowedConfigurations($this->incomingConfigurationSelection, $configurations);
1356
1357
        // Traverse parameter combinations:
1358
        $c = 0;
1359
        $content = '';
1360
        if (! empty($configurations)) {
1361
            foreach ($configurations as $confKey => $confArray) {
1362
1363
                // Title column:
1364
                if (! $c) {
1365
                    $titleClm = '<td rowspan="' . count($configurations) . '">' . $pageTitle . '</td>';
1366
                } else {
1367
                    $titleClm = '';
1368
                }
1369
1370
                if (! in_array($pageRow['uid'], $this->configurationService->expandExcludeString($confArray['subCfg']['exclude']), true)) {
1371
1372
                    // URL list:
1373
                    $urlList = $this->urlListFromUrlArray(
1374
                        $confArray,
1375
                        $pageRow,
1376
                        $this->scheduledTime,
1377
                        $this->reqMinute,
1378
                        $this->submitCrawlUrls,
1379
                        $this->downloadCrawlUrls,
1380
                        $this->duplicateTrack,
1381
                        $this->downloadUrls,
1382
                        // if empty the urls won't be filtered by processing instructions
1383
                        $this->incomingProcInstructions
1384
                    );
1385
1386
                    // Expanded parameters:
1387
                    $paramExpanded = '';
1388
                    $calcAccu = [];
1389
                    $calcRes = 1;
1390
                    foreach ($confArray['paramExpanded'] as $gVar => $gVal) {
1391
                        $paramExpanded .= '
1392
                            <tr>
1393
                                <td>' . htmlspecialchars('&' . $gVar . '=') . '<br/>' .
1394
                            '(' . count($gVal) . ')' .
1395
                            '</td>
1396
                                <td nowrap="nowrap">' . nl2br(htmlspecialchars(implode(chr(10), $gVal))) . '</td>
1397
                            </tr>
1398
                        ';
1399
                        $calcRes *= count($gVal);
1400
                        $calcAccu[] = count($gVal);
1401
                    }
1402
                    $paramExpanded = '<table>' . $paramExpanded . '</table>';
1403
                    $paramExpanded .= 'Comb: ' . implode('*', $calcAccu) . '=' . $calcRes;
1404
1405
                    // Options
1406
                    $optionValues = '';
1407
                    if ($confArray['subCfg']['userGroups']) {
1408
                        $optionValues .= 'User Groups: ' . $confArray['subCfg']['userGroups'] . '<br/>';
1409
                    }
1410
                    if ($confArray['subCfg']['procInstrFilter']) {
1411
                        $optionValues .= 'ProcInstr: ' . $confArray['subCfg']['procInstrFilter'] . '<br/>';
1412
                    }
1413
1414
                    // Compile row:
1415
                    $content .= '
1416
                        <tr>
1417
                            ' . $titleClm . '
1418
                            <td>' . htmlspecialchars($confKey) . '</td>
1419
                            <td>' . nl2br(htmlspecialchars(rawurldecode(trim(str_replace('&', chr(10) . '&', GeneralUtility::implodeArrayForUrl('', $confArray['paramParsed'])))))) . '</td>
1420
                            <td>' . $paramExpanded . '</td>
1421
                            <td nowrap="nowrap">' . $urlList . '</td>
1422
                            <td nowrap="nowrap">' . $optionValues . '</td>
1423
                            <td nowrap="nowrap">' . DebugUtility::viewArray($confArray['subCfg']['procInstrParams.']) . '</td>
1424
                        </tr>';
1425
                } else {
1426
                    $content .= '<tr>
1427
                            ' . $titleClm . '
1428
                            <td>' . htmlspecialchars($confKey) . '</td>
1429
                            <td colspan="5"><em>No entries</em> (Page is excluded in this configuration)</td>
1430
                        </tr>';
1431
                }
1432
1433
                $c++;
1434
            }
1435
        } else {
1436
            $message = ! empty($skipMessage) ? ' (' . $skipMessage . ')' : '';
1437
1438
            // Compile row:
1439
            $content .= '
1440
                <tr>
1441
                    <td>' . $pageTitle . '</td>
1442
                    <td colspan="6"><em>No entries</em>' . $message . '</td>
1443
                </tr>';
1444
        }
1445
1446
        return $content;
1447
    }
1448
1449
    /*****************************
1450
     *
1451
     * CLI functions
1452
     *
1453
     *****************************/
1454
1455
    /**
1456
     * Running the functionality of the CLI (crawling URLs from queue)
1457
     * @deprecated
1458
     * @codeCoverageIgnore
1459
     */
1460
    public function CLI_run(int $countInARun, int $sleepTime, int $sleepAfterFinish): int
1461
    {
1462
        $result = 0;
1463
        $counter = 0;
1464
1465
        // First, run hooks:
1466
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['cli_hooks'] ?? [] as $objRef) {
1467
            trigger_error(
1468
                'This hook (crawler/cli_hooks) is deprecated since 9.1.5 and will be removed when dropping support for TYPO3 9LTS and 10LTS',
1469
                E_USER_DEPRECATED
1470
            );
1471
            $hookObj = GeneralUtility::makeInstance($objRef);
1472
            if (is_object($hookObj)) {
1473
                $hookObj->crawler_init($this);
1474
            }
1475
        }
1476
1477
        // Clean up the queue
1478
        $this->queueRepository->cleanupQueue();
1479
1480
        // Select entries:
1481
        $rows = $this->queueRepository->fetchRecordsToBeCrawled($countInARun);
1482
1483
        if (! empty($rows)) {
1484
            $quidList = [];
1485
1486
            foreach ($rows as $r) {
1487
                $quidList[] = $r['qid'];
1488
            }
1489
1490
            $processId = $this->CLI_buildProcessId();
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...r::CLI_buildProcessId() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1490
            $processId = /** @scrutinizer ignore-deprecated */ $this->CLI_buildProcessId();
Loading history...
1491
1492
            //save the number of assigned queue entries to determine how many have been processed later
1493
            $numberOfAffectedRows = $this->queueRepository->updateProcessIdAndSchedulerForQueueIds($quidList, $processId);
1494
            $this->processRepository->updateProcessAssignItemsCount($numberOfAffectedRows, $processId);
1495
1496
            if ($numberOfAffectedRows !== count($quidList)) {
1497
                return ($result | self::CLI_STATUS_ABORTED);
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Controller\C...ler::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be removed in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1497
                return ($result | /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED);

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
1498
            }
1499
1500
            foreach ($rows as $r) {
1501
                $result |= $this->readUrl($r['qid']);
1502
1503
                $counter++;
1504
                // Just to relax the system
1505
                usleep((int) $sleepTime);
1506
1507
                // if during the start and the current read url the cli has been disable we need to return from the function
1508
                // mark the process NOT as ended.
1509
                if ($this->crawler->isDisabled()) {
1510
                    return ($result | self::CLI_STATUS_ABORTED);
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Controller\C...ler::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be removed in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1510
                    return ($result | /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED);

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
1511
                }
1512
1513
                if (! $this->processRepository->isProcessActive($this->CLI_buildProcessId())) {
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...r::CLI_buildProcessId() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1513
                if (! $this->processRepository->isProcessActive(/** @scrutinizer ignore-deprecated */ $this->CLI_buildProcessId())) {
Loading history...
1514
                    $this->CLI_debug('conflict / timeout (' . $this->CLI_buildProcessId() . ')');
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...r::CLI_buildProcessId() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1514
                    $this->CLI_debug('conflict / timeout (' . /** @scrutinizer ignore-deprecated */ $this->CLI_buildProcessId() . ')');
Loading history...
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...Controller::CLI_debug() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1514
                    /** @scrutinizer ignore-deprecated */ $this->CLI_debug('conflict / timeout (' . $this->CLI_buildProcessId() . ')');
Loading history...
1515
                    $result |= self::CLI_STATUS_ABORTED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Controller\C...ler::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be removed in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1515
                    $result |= /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
1516
                    //possible timeout
1517
                    break;
1518
                }
1519
            }
1520
1521
            sleep((int) $sleepAfterFinish);
1522
        }
1523
1524
        if ($counter > 0) {
1525
            $result |= self::CLI_STATUS_PROCESSED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Controller\C...r::CLI_STATUS_PROCESSED has been deprecated: since 9.2.5 will be removed in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1525
            $result |= /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_PROCESSED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
1526
        }
1527
1528
        return $result;
1529
    }
1530
1531
    /**
1532
     * Activate hooks
1533
     * @deprecated
1534
     * @codeCoverageIgnore
1535
     */
1536
    public function CLI_runHooks(): void
1537
    {
1538
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['cli_hooks'] ?? [] as $objRef) {
1539
            $hookObj = GeneralUtility::makeInstance($objRef);
1540
            if (is_object($hookObj)) {
1541
                $hookObj->crawler_init($this);
1542
            }
1543
        }
1544
    }
1545
1546
    /**
1547
     * Try to acquire a new process with the given id
1548
     * also performs some auto-cleanup for orphan processes
1549
     * @param string $id identification string for the process
1550
     * @return boolean
1551
     * @todo preemption might not be the most elegant way to clean up
1552
     * @deprecated
1553
     * @codeCoverageIgnore
1554
     */
1555
    public function CLI_checkAndAcquireNewProcess($id)
1556
    {
1557
        $ret = true;
1558
1559
        $systemProcessId = getmypid();
1560
        if (! $systemProcessId) {
1561
            return false;
1562
        }
1563
1564
        $processCount = 0;
1565
        $orphanProcesses = [];
1566
1567
        $activeProcesses = $this->processRepository->findAllActive();
1568
        $currentTime = $this->getCurrentTime();
1569
1570
        /** @var Process $process */
1571
        foreach ($activeProcesses as $process) {
1572
            if ($process->getTtl() < $currentTime) {
1573
                $orphanProcesses[] = $process->getProcessId();
1574
            } else {
1575
                $processCount++;
1576
            }
1577
        }
1578
1579
        // if there are less than allowed active processes then add a new one
1580
        if ($processCount < (int) $this->extensionSettings['processLimit']) {
1581
            $this->processRepository->addProcess($id, $systemProcessId);
1582
        } else {
1583
            $ret = false;
1584
        }
1585
1586
        $this->processRepository->deleteProcessesMarkedAsDeleted();
1587
        $this->processRepository->markRequestedProcessesAsNotActive($orphanProcesses);
1588
        $this->queueRepository->unsetProcessScheduledAndProcessIdForQueueEntries($orphanProcesses);
1589
1590
        return $ret;
1591
    }
1592
1593
    /**
1594
     * Release a process and the required resources
1595
     *
1596
     * @param mixed $releaseIds string with a single process-id or array with multiple process-ids
1597
     * @return boolean
1598
     * @deprecated
1599
     * @codeCoverageIgnore
1600
     */
1601
    public function CLI_releaseProcesses($releaseIds)
1602
    {
1603
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1603
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1604
1605
        if (! is_array($releaseIds)) {
1606
            $releaseIds = [$releaseIds];
1607
        }
1608
1609
        if (empty($releaseIds)) {
1610
            //nothing to release
1611
            return false;
1612
        }
1613
1614
        // some kind of 2nd chance algo - this way you need at least 2 processes to have a real cleanup
1615
        // this ensures that a single process can't mess up the entire process table
1616
1617
        // mark all processes as deleted which have no "waiting" queue-entires and which are not active
1618
1619
        // ReleaseQueueEntries
1620
        $queryBuilder
1621
            ->update(QueueRepository::TABLE_NAME, 'q')
1622
            ->where(
1623
                'q.process_id IN(SELECT p.process_id FROM tx_crawler_process as p WHERE p.active = 0)'
1624
            )
1625
            ->set('q.process_scheduled', 0)
1626
            ->set('q.process_id', '')
1627
            ->execute();
1628
1629
        // FIXME: Not entirely sure that this is equivalent to the previous version
1630
        $queryBuilder->resetQueryPart('set');
1631
1632
        // ReleaseProcessEntries
1633
        $queryBuilder
1634
            ->update(ProcessRepository::TABLE_NAME)
1635
            ->where(
1636
                $queryBuilder->expr()->eq('active', 0),
1637
                'process_id IN(SELECT q.process_id FROM tx_crawler_queue as q WHERE q.exec_time = 0)'
1638
            )
1639
            ->set('system_process_id', 0)
1640
            ->execute();
1641
1642
        $this->processRepository->markRequestedProcessesAsNotActive($releaseIds);
1643
        $this->queueRepository->unsetProcessScheduledAndProcessIdForQueueEntries($releaseIds);
1644
1645
        return true;
1646
    }
1647
1648
    /**
1649
     * Create a unique Id for the current process
1650
     *
1651
     * @return string the ID
1652
     * @deprecated
1653
     * @codeCoverageIgnore
1654
     */
1655
    public function CLI_buildProcessId()
1656
    {
1657
        if (! $this->processID) {
1658
            $this->processID = GeneralUtility::shortMD5(microtime(true));
1659
        }
1660
        return $this->processID;
1661
    }
1662
1663
    /**
1664
     * Prints a message to the stdout (only if debug-mode is enabled)
1665
     *
1666
     * @param string $msg the message
1667
     * @deprecated
1668
     * @codeCoverageIgnore
1669
     */
1670
    public function CLI_debug($msg): void
1671
    {
1672
        if ((int) $this->extensionSettings['processDebug']) {
1673
            echo $msg . "\n";
1674
            flush();
1675
        }
1676
    }
1677
1678
    /**
1679
     * Cleans up entries that stayed for too long in the queue. These are:
1680
     * - processed entries that are over 1.5 days in age
1681
     * - scheduled entries that are over 7 days old
1682
     *
1683
     * @deprecated
1684
     */
1685
    public function cleanUpOldQueueEntries(): void
1686
    {
1687
        // 24*60*60 Seconds in 24 hours
1688
        $processedAgeInSeconds = $this->extensionSettings['cleanUpProcessedAge'] * 86400;
1689
        $scheduledAgeInSeconds = $this->extensionSettings['cleanUpScheduledAge'] * 86400;
1690
1691
        $now = time();
1692
        $condition = '(exec_time<>0 AND exec_time<' . ($now - $processedAgeInSeconds) . ') OR scheduled<=' . ($now - $scheduledAgeInSeconds);
1693
        $this->flushQueue($condition);
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...ontroller::flushQueue() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1693
        /** @scrutinizer ignore-deprecated */ $this->flushQueue($condition);
Loading history...
1694
    }
1695
1696
    /**
1697
     * Removes queue entries
1698
     *
1699
     * @param string $where SQL related filter for the entries which should be removed
1700
     *
1701
     * @deprecated
1702
     */
1703
    protected function flushQueue($where = ''): void
1704
    {
1705
        $realWhere = strlen((string) $where) > 0 ? $where : '1=1';
1706
1707
        $queryBuilder = $this->getQueryBuilder($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1707
        $queryBuilder = $this->getQueryBuilder(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1708
1709
        $groups = $queryBuilder
0 ignored issues
show
Deprecated Code introduced by
The function Doctrine\DBAL\ForwardCom...lity\Result::fetchAll() has been deprecated: Use fetchAllNumeric(), fetchAllAssociative() or fetchFirstColumn() instead. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1709
        $groups = /** @scrutinizer ignore-deprecated */ $queryBuilder

This function has been deprecated. The supplier of the function has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.

Loading history...
1710
            ->selectLiteral('DISTINCT set_id')
1711
            ->from($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1711
            ->from(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1712
            ->where($realWhere)
1713
            ->execute()
1714
            ->fetchAll();
1715
        if (is_array($groups)) {
0 ignored issues
show
introduced by
The condition is_array($groups) is always true.
Loading history...
1716
            foreach ($groups as $group) {
1717
                $subSet = $queryBuilder
0 ignored issues
show
Deprecated Code introduced by
The function Doctrine\DBAL\ForwardCom...lity\Result::fetchAll() has been deprecated: Use fetchAllNumeric(), fetchAllAssociative() or fetchFirstColumn() instead. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1717
                $subSet = /** @scrutinizer ignore-deprecated */ $queryBuilder

This function has been deprecated. The supplier of the function has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.

Loading history...
1718
                    ->select('qid', 'set_id')
1719
                    ->from($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1719
                    ->from(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1720
                    ->where(
1721
                        $realWhere,
1722
                        $queryBuilder->expr()->eq('set_id', $group['set_id'])
1723
                    )
1724
                    ->execute()
1725
                    ->fetchAll();
1726
1727
                $payLoad = ['subSet' => $subSet];
1728
                SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1728
                /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1729
                    self::class,
1730
                    SignalSlotUtility::SIGNAL_QUEUE_ENTRY_FLUSH,
1731
                    $payLoad
1732
                );
1733
            }
1734
        }
1735
1736
        $queryBuilder
1737
            ->delete($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1737
            ->delete(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1738
            ->where($realWhere)
1739
            ->execute();
1740
    }
1741
1742
    /**
1743
     * This method determines duplicates for a queue entry with the same parameters and this timestamp.
1744
     * If the timestamp is in the past, it will check if there is any unprocessed queue entry in the past.
1745
     * If the timestamp is in the future it will check, if the queued entry has exactly the same timestamp
1746
     *
1747
     * @param int $tstamp
1748
     * @param array $fieldArray
1749
     *
1750
     * @return array
1751
     * @deprecated
1752
     */
1753
    protected function getDuplicateRowsIfExist($tstamp, $fieldArray)
1754
    {
1755
        $rows = [];
1756
1757
        $currentTime = $this->getCurrentTime();
1758
1759
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1759
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1760
        $queryBuilder
1761
            ->select('qid')
1762
            ->from(QueueRepository::TABLE_NAME);
1763
        //if this entry is scheduled with "now"
1764
        if ($tstamp <= $currentTime) {
1765
            if ($this->extensionSettings['enableTimeslot']) {
1766
                $timeBegin = $currentTime - 100;
1767
                $timeEnd = $currentTime + 100;
1768
                $queryBuilder
1769
                    ->where(
1770
                        'scheduled BETWEEN ' . $timeBegin . ' AND ' . $timeEnd . ''
1771
                    )
1772
                    ->orWhere(
1773
                        $queryBuilder->expr()->lte('scheduled', $currentTime)
1774
                    );
1775
            } else {
1776
                $queryBuilder
1777
                    ->where(
1778
                        $queryBuilder->expr()->lte('scheduled', $currentTime)
1779
                    );
1780
            }
1781
        } elseif ($tstamp > $currentTime) {
1782
            //entry with a timestamp in the future need to have the same schedule time
1783
            $queryBuilder
1784
                ->where(
1785
                    $queryBuilder->expr()->eq('scheduled', $tstamp)
1786
                );
1787
        }
1788
1789
        $queryBuilder
1790
            ->andWhere('NOT exec_time')
1791
            ->andWhere('NOT process_id')
1792
            ->andWhere($queryBuilder->expr()->eq('page_id', $queryBuilder->createNamedParameter($fieldArray['page_id'], PDO::PARAM_INT)))
1793
            ->andWhere($queryBuilder->expr()->eq('parameters_hash', $queryBuilder->createNamedParameter($fieldArray['parameters_hash'], PDO::PARAM_STR)));
1794
1795
        $statement = $queryBuilder->execute();
1796
1797
        while ($row = $statement->fetch()) {
1798
            $rows[] = $row['qid'];
1799
        }
1800
1801
        return $rows;
1802
    }
1803
1804
    /**
1805
     * Returns a md5 hash generated from a serialized configuration array.
1806
     *
1807
     * @return string
1808
     */
1809
    protected function getConfigurationHash(array $configuration)
1810
    {
1811
        unset($configuration['paramExpanded']);
1812
        unset($configuration['URLs']);
1813
        return md5(serialize($configuration));
1814
    }
1815
1816
    /**
1817
     * Build a URL from a Page and the Query String. If the page has a Site configuration, it can be built by using
1818
     * the Site instance.
1819
     *
1820
     * @param int $httpsOrHttp see tx_crawler_configuration.force_ssl
1821
     * @throws SiteNotFoundException
1822
     * @throws InvalidRouteArgumentsException
1823
     *
1824
     * @deprecated Using CrawlerController::getUrlFromPageAndQueryParameters() is deprecated since 9.1.1 and will be removed in v11.x, please use UrlService->getUrlFromPageAndQueryParameters() instead.
1825
     * @codeCoverageIgnore
1826
     */
1827
    protected function getUrlFromPageAndQueryParameters(int $pageId, string $queryString, ?string $alternativeBaseUrl, int $httpsOrHttp): UriInterface
1828
    {
1829
        $urlService = new UrlService();
1830
        return $urlService->getUrlFromPageAndQueryParameters($pageId, $queryString, $alternativeBaseUrl, $httpsOrHttp);
1831
    }
1832
1833
    /**
1834
     * @deprecated
1835
     */
1836
    protected function swapIfFirstIsLargerThanSecond(array $reg): array
1837
    {
1838
        // Swap if first is larger than last:
1839
        if ($reg[1] > $reg[2]) {
1840
            $temp = $reg[2];
1841
            $reg[2] = $reg[1];
1842
            $reg[1] = $temp;
1843
        }
1844
1845
        return $reg;
1846
    }
1847
1848
    protected function getPageService(): PageService
1849
    {
1850
        return new PageService();
1851
    }
1852
1853
    private function getMaximumUrlsToCompile(): int
1854
    {
1855
        return $this->maximumUrlsToCompile;
1856
    }
1857
1858
    /**
1859
     * @return BackendUserAuthentication
1860
     */
1861
    private function getBackendUser()
1862
    {
1863
        // Make sure the _cli_ user is loaded
1864
        Bootstrap::initializeBackendAuthentication();
1865
        if ($this->backendUser === null) {
1866
            $this->backendUser = $GLOBALS['BE_USER'];
1867
        }
1868
        return $this->backendUser;
1869
    }
1870
1871
    /**
1872
     * Get querybuilder for given table
1873
     *
1874
     * @return QueryBuilder
1875
     */
1876
    private function getQueryBuilder(string $table)
1877
    {
1878
        return GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($table);
1879
    }
1880
}
1881