Passed
Push — main ( d42fc3...d75d73 )
by Tomas Norre
30:18 queued 25:31
created

CrawlerController::setExtensionSettings()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 1
c 0
b 0
f 0
nc 1
nop 1
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\Controller;
6
7
/***************************************************************
8
 *  Copyright notice
9
 *
10
 *  (c) 2020 AOE GmbH <[email protected]>
11
 *
12
 *  All rights reserved
13
 *
14
 *  This script is part of the TYPO3 project. The TYPO3 project is
15
 *  free software; you can redistribute it and/or modify
16
 *  it under the terms of the GNU General Public License as published by
17
 *  the Free Software Foundation; either version 3 of the License, or
18
 *  (at your option) any later version.
19
 *
20
 *  The GNU General Public License can be found at
21
 *  http://www.gnu.org/copyleft/gpl.html.
22
 *
23
 *  This script is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26
 *  GNU General Public License for more details.
27
 *
28
 *  This copyright notice MUST APPEAR in all copies of the script!
29
 ***************************************************************/
30
31
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
32
use AOE\Crawler\Converter\JsonCompatibilityConverter;
33
use AOE\Crawler\Crawler;
34
use AOE\Crawler\CrawlStrategy\CrawlStrategyFactory;
35
use AOE\Crawler\Domain\Model\Process;
36
use AOE\Crawler\Domain\Repository\ConfigurationRepository;
37
use AOE\Crawler\Domain\Repository\ProcessRepository;
38
use AOE\Crawler\Domain\Repository\QueueRepository;
39
use AOE\Crawler\QueueExecutor;
40
use AOE\Crawler\Service\ConfigurationService;
41
use AOE\Crawler\Service\PageService;
42
use AOE\Crawler\Service\UrlService;
43
use AOE\Crawler\Service\UserService;
44
use AOE\Crawler\Utility\SignalSlotUtility;
45
use AOE\Crawler\Value\QueueFilter;
46
use PDO;
47
use Psr\Http\Message\UriInterface;
48
use Psr\Log\LoggerAwareInterface;
49
use Psr\Log\LoggerAwareTrait;
50
use TYPO3\CMS\Backend\Tree\View\PageTreeView;
51
use TYPO3\CMS\Backend\Utility\BackendUtility;
52
use TYPO3\CMS\Core\Authentication\BackendUserAuthentication;
53
use TYPO3\CMS\Core\Compatibility\PublicMethodDeprecationTrait;
54
use TYPO3\CMS\Core\Compatibility\PublicPropertyDeprecationTrait;
55
use TYPO3\CMS\Core\Core\Bootstrap;
56
use TYPO3\CMS\Core\Core\Environment;
57
use TYPO3\CMS\Core\Database\Connection;
58
use TYPO3\CMS\Core\Database\ConnectionPool;
59
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
60
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
61
use TYPO3\CMS\Core\Database\QueryGenerator;
62
use TYPO3\CMS\Core\Domain\Repository\PageRepository;
63
use TYPO3\CMS\Core\Exception\SiteNotFoundException;
64
use TYPO3\CMS\Core\Imaging\Icon;
65
use TYPO3\CMS\Core\Imaging\IconFactory;
66
use TYPO3\CMS\Core\Routing\InvalidRouteArgumentsException;
67
use TYPO3\CMS\Core\Site\Entity\Site;
68
use TYPO3\CMS\Core\Type\Bitmask\Permission;
69
use TYPO3\CMS\Core\TypoScript\Parser\TypoScriptParser;
70
use TYPO3\CMS\Core\Utility\DebugUtility;
71
use TYPO3\CMS\Core\Utility\GeneralUtility;
72
use TYPO3\CMS\Core\Utility\MathUtility;
73
use TYPO3\CMS\Extbase\Object\ObjectManager;
74
75
/**
76
 * Class CrawlerController
77
 *
78
 * @package AOE\Crawler\Controller
79
 */
80
class CrawlerController implements LoggerAwareInterface
81
{
82
    use LoggerAwareTrait;
83
    use PublicMethodDeprecationTrait;
84
    use PublicPropertyDeprecationTrait;
85
86
    /**
87
     * @deprecated since 9.2.5 will be removed in v11.x
88
     */
89
    public const CLI_STATUS_NOTHING_PROCCESSED = 0;
90
91
    /**
92
     * queue not empty
93
     * @deprecated since 9.2.5 will be removed in v11.x
94
     */
95
    public const CLI_STATUS_REMAIN = 1;
96
97
    /**
98
     * (some) queue items where processed
99
     * @deprecated since 9.2.5 will be removed in v11.x
100
     */
101
    public const CLI_STATUS_PROCESSED = 2;
102
103
    /**
104
     * instance didn't finish
105
     * @deprecated since 9.2.5 will be removed in v11.x
106
     */
107
    public const CLI_STATUS_ABORTED = 4;
108
109
    /**
110
     * @deprecated since 9.2.5 will be removed in v11.x
111
     */
112
    public const CLI_STATUS_POLLABLE_PROCESSED = 8;
113
114
    /**
115
     * @var integer
116
     */
117
    public $setID = 0;
118
119
    /**
120
     * @var string
121
     */
122
    public $processID = '';
123
124
    /**
125
     * @var array
126
     */
127
    public $duplicateTrack = [];
128
129
    /**
130
     * @var array
131
     */
132
    public $downloadUrls = [];
133
134
    /**
135
     * @var array
136
     */
137
    public $incomingProcInstructions = [];
138
139
    /**
140
     * @var array
141
     */
142
    public $incomingConfigurationSelection = [];
143
144
    /**
145
     * @var bool
146
     */
147
    public $registerQueueEntriesInternallyOnly = false;
148
149
    /**
150
     * @var array
151
     */
152
    public $queueEntries = [];
153
154
    /**
155
     * @var array
156
     */
157
    public $urlList = [];
158
159
    /**
160
     * @var array
161
     */
162
    public $extensionSettings = [];
163
164
    /**
165
     * Mount Point
166
     *
167
     * @var bool
168
     * Todo: Check what this is used for and adjust the type hint or code, as bool doesn't match the current code.
169
     */
170
    public $MP = false;
171
172
    /**
173
     * @var string
174
     * @deprecated
175
     */
176
    protected $processFilename;
177
178
    /**
179
     * Holds the internal access mode can be 'gui','cli' or 'cli_im'
180
     *
181
     * @var string
182
     * @deprecated
183
     */
184
    protected $accessMode;
185
186
    /**
187
     * @var QueueRepository
188
     */
189
    protected $queueRepository;
190
191
    /**
192
     * @var ProcessRepository
193
     */
194
    protected $processRepository;
195
196
    /**
197
     * @var ConfigurationRepository
198
     */
199
    protected $configurationRepository;
200
201
    /**
202
     * @var string
203
     * @deprecated Since v9.2.5 - This will be remove in v10
204
     */
205
    protected $tableName = 'tx_crawler_queue';
206
207
    /**
208
     * @var QueueExecutor
209
     */
210
    protected $queueExecutor;
211
212
    /**
213
     * @var int
214
     */
215
    protected $maximumUrlsToCompile = 10000;
216
217
    /**
218
     * @var IconFactory
219
     */
220
    protected $iconFactory;
221
222
    /**
223
     * @var string[]
224
     */
225
    private $deprecatedPublicMethods = [
0 ignored issues
show
introduced by
The private property $deprecatedPublicMethods is not used, and could be removed.
Loading history...
226
        'cleanUpOldQueueEntries' => 'Using CrawlerController::cleanUpOldQueueEntries() is deprecated since 9.0.1 and will be removed in v11.x, please use QueueRepository->cleanUpOldQueueEntries() instead.',
227
        'CLI_buildProcessId' => 'Using CrawlerController->CLI_buildProcessId() is deprecated since 9.2.5 and will be removed in v11.x',
228
        'CLI_checkAndAcquireNewProcess' => 'Using CrawlerController->CLI_checkAndAcquireNewProcess() is deprecated since 9.2.5 and will be removed in v11.x',
229
        'CLI_debug' => 'Using CrawlerController->CLI_debug() is deprecated since 9.1.3 and will be removed in v11.x',
230
        'CLI_releaseProcesses' => 'Using CrawlerController->CLI_releaseProcesses() is deprecated since 9.2.2 and will be removed in v11.x',
231
        'CLI_run' => 'Using CrawlerController->CLI_run() is deprecated since 9.2.2 and will be removed in v11.x',
232
        'CLI_runHooks' => 'Using CrawlerController->CLI_runHooks() is deprecated since 9.1.5 and will be removed in v11.x',
233
        'getAccessMode' => 'Using CrawlerController->getAccessMode() is deprecated since 9.1.3 and will be removed in v11.x',
234
        'getLogEntriesForPageId' => 'Using CrawlerController->getLogEntriesForPageId() is deprecated since 9.1.5 and will be remove in v11.x',
235
        'getLogEntriesForSetId' => 'Using crawlerController::getLogEntriesForSetId() is deprecated since 9.0.1 and will be removed in v11.x',
236
        'hasGroupAccess' => 'Using CrawlerController->getLogEntriesForPageId() is deprecated since 9.2.2 and will be remove in v11.x, please use UserService::hasGroupAccess() instead.',
237
        'flushQueue' => 'Using CrawlerController::flushQueue() is deprecated since 9.0.1 and will be removed in v11.x, please use QueueRepository->flushQueue() instead.',
238
        'setAccessMode' => 'Using CrawlerController->setAccessMode() is deprecated since 9.1.3 and will be removed in v11.x',
239
        'getDisabled' => 'Using CrawlerController->getDisabled() is deprecated since 9.1.3 and will be removed in v11.x, please use Crawler->isDisabled() instead',
240
        'setDisabled' => 'Using CrawlerController->setDisabled() is deprecated since 9.1.3 and will be removed in v11.x, please use Crawler->setDisabled() instead',
241
        'getProcessFilename' => 'Using CrawlerController->getProcessFilename() is deprecated since 9.1.3 and will be removed in v11.x',
242
        'setProcessFilename' => 'Using CrawlerController->setProcessFilename() is deprecated since 9.1.3 and will be removed in v11.x',
243
        'getDuplicateRowsIfExist' => 'Using CrawlerController->getDuplicateRowsIfExist() is deprecated since 9.1.4 and will be remove in v11.x, please use QueueRepository->getDuplicateQueueItemsIfExists() instead',
244
        'checkIfPageShouldBeSkipped' => 'Using CrawlerController->checkIfPageShouldBeSkipped() is deprecated since 9.2.5 and will be removed in v11.x',
245
    ];
246
247
    /**
248
     * @var string[]
249
     */
250
    private $deprecatedPublicProperties = [
251
        'accessMode' => 'Using CrawlerController->accessMode is deprecated since 9.1.3 and will be removed in v11.x',
252
        'processFilename' => 'Using CrawlerController->accessMode is deprecated since 9.1.3 and will be removed in v11.x',
253
    ];
254
255
    /**
256
     * @var BackendUserAuthentication|null
257
     */
258
    private $backendUser;
259
260
    /**
261
     * @var integer
262
     */
263
    private $scheduledTime = 0;
264
265
    /**
266
     * @var integer
267
     */
268
    private $reqMinute = 0;
269
270
    /**
271
     * @var bool
272
     */
273
    private $submitCrawlUrls = false;
274
275
    /**
276
     * @var bool
277
     */
278
    private $downloadCrawlUrls = false;
279
280
    /**
281
     * @var PageRepository
282
     */
283
    private $pageRepository;
284
285
    /**
286
     * @var Crawler
287
     */
288
    private $crawler;
289
290
    /************************************
291
     *
292
     * Getting URLs based on Page TSconfig
293
     *
294
     ************************************/
295
296 41
    public function __construct()
297
    {
298 41
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
299 41
        $crawlStrategyFactory = GeneralUtility::makeInstance(CrawlStrategyFactory::class);
300 41
        $this->queueRepository = $objectManager->get(QueueRepository::class);
301 41
        $this->processRepository = $objectManager->get(ProcessRepository::class);
302 41
        $this->configurationRepository = $objectManager->get(ConfigurationRepository::class);
303 41
        $this->pageRepository = GeneralUtility::makeInstance(PageRepository::class);
304 41
        $this->queueExecutor = GeneralUtility::makeInstance(QueueExecutor::class, $crawlStrategyFactory);
305 41
        $this->iconFactory = GeneralUtility::makeInstance(IconFactory::class);
306 41
        $this->crawler = GeneralUtility::makeInstance(Crawler::class);
307
308 41
        $this->processFilename = Environment::getVarPath() . '/lock/tx_crawler.proc';
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

308
        /** @scrutinizer ignore-deprecated */ $this->processFilename = Environment::getVarPath() . '/lock/tx_crawler.proc';
Loading history...
309
310
        /** @var ExtensionConfigurationProvider $configurationProvider */
311 41
        $configurationProvider = GeneralUtility::makeInstance(ExtensionConfigurationProvider::class);
312 41
        $settings = $configurationProvider->getExtensionConfiguration();
313 41
        $this->extensionSettings = is_array($settings) ? $settings : [];
314
315 41
        if (MathUtility::convertToPositiveInteger($this->extensionSettings['countInARun']) === 0) {
316
            $this->extensionSettings['countInARun'] = 100;
317
        }
318
319 41
        $this->extensionSettings['processLimit'] = MathUtility::forceIntegerInRange($this->extensionSettings['processLimit'], 1, 99, 1);
320 41
        $this->setMaximumUrlsToCompile(MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000));
321 41
    }
322
323 45
    public function setMaximumUrlsToCompile(int $maximumUrlsToCompile): void
324
    {
325 45
        $this->maximumUrlsToCompile = $maximumUrlsToCompile;
326 45
    }
327
328
    /**
329
     * Method to set the accessMode can be gui, cli or cli_im
330
     *
331
     * @return string
332
     * @deprecated
333
     */
334 1
    public function getAccessMode()
335
    {
336 1
        return $this->accessMode;
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...Controller::$accessMode has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

336
        return /** @scrutinizer ignore-deprecated */ $this->accessMode;
Loading history...
337
    }
338
339
    /**
340
     * @param string $accessMode
341
     * @deprecated
342
     */
343 1
    public function setAccessMode($accessMode): void
344
    {
345 1
        $this->accessMode = $accessMode;
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...Controller::$accessMode has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

345
        /** @scrutinizer ignore-deprecated */ $this->accessMode = $accessMode;
Loading history...
346 1
    }
347
348
    /**
349
     * Set disabled status to prevent processes from being processed
350
     * @deprecated
351
     */
352 3
    public function setDisabled(?bool $disabled = true): void
353
    {
354 3
        if ($disabled) {
355 2
            GeneralUtility::writeFile($this->processFilename, 'disabled');
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

355
            GeneralUtility::writeFile(/** @scrutinizer ignore-deprecated */ $this->processFilename, 'disabled');
Loading history...
356 1
        } elseif (is_file($this->processFilename)) {
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

356
        } elseif (is_file(/** @scrutinizer ignore-deprecated */ $this->processFilename)) {
Loading history...
357 1
            unlink($this->processFilename);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

357
            unlink(/** @scrutinizer ignore-deprecated */ $this->processFilename);
Loading history...
358
        }
359 3
    }
360
361
    /**
362
     * Get disable status
363
     * @deprecated
364
     */
365 3
    public function getDisabled(): bool
366
    {
367 3
        return is_file($this->processFilename);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

367
        return is_file(/** @scrutinizer ignore-deprecated */ $this->processFilename);
Loading history...
368
    }
369
370
    /**
371
     * @param string $filenameWithPath
372
     * @deprecated
373
     */
374 4
    public function setProcessFilename($filenameWithPath): void
375
    {
376 4
        $this->processFilename = $filenameWithPath;
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

376
        /** @scrutinizer ignore-deprecated */ $this->processFilename = $filenameWithPath;
Loading history...
377 4
    }
378
379
    /**
380
     * @return string
381
     * @deprecated
382
     */
383 1
    public function getProcessFilename()
384
    {
385 1
        return $this->processFilename;
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...oller::$processFilename has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

385
        return /** @scrutinizer ignore-deprecated */ $this->processFilename;
Loading history...
386
    }
387
388
    /**
389
     * Sets the extensions settings (unserialized pendant of $TYPO3_CONF_VARS['EXT']['extConf']['crawler']).
390
     */
391 6
    public function setExtensionSettings(array $extensionSettings): void
392
    {
393 6
        $this->extensionSettings = $extensionSettings;
394 6
    }
395
396
    /**
397
     * Check if the given page should be crawled
398
     *
399
     * @return false|string false if the page should be crawled (not excluded), true / skipMessage if it should be skipped
400
     * @deprecated
401
     */
402
    public function checkIfPageShouldBeSkipped(array $pageRow)
403
    {
404
        $pageService = GeneralUtility::makeInstance(PageService::class);
405
        return $pageService->checkIfPageShouldBeSkipped($pageRow);
406
    }
407
408
    /**
409
     * Wrapper method for getUrlsForPageId()
410
     * It returns an array of configurations and no urls!
411
     *
412
     * @param array $pageRow Page record with at least dok-type and uid columns.
413
     * @param string $skipMessage
414
     * @return array
415
     * @see getUrlsForPageId()
416
     */
417 9
    public function getUrlsForPageRow(array $pageRow, &$skipMessage = '')
418
    {
419 9
        if (! is_int($pageRow['uid'])) {
420
            $skipMessage = 'PageUid ' . $pageRow['uid'] . ' was not an integer';
421
            return [];
422
        }
423
424 9
        $message = $this->getPageService()->checkIfPageShouldBeSkipped($pageRow);
425 9
        if ($message === false) {
426 8
            $res = $this->getUrlsForPageId($pageRow['uid']);
427 8
            $skipMessage = '';
428
        } else {
429 1
            $skipMessage = $message;
430 1
            $res = [];
431
        }
432
433 9
        return $res;
434
    }
435
436
    /**
437
     * Creates a list of URLs from input array (and submits them to queue if asked for)
438
     * See Web > Info module script + "indexed_search"'s crawler hook-client using this!
439
     *
440
     * @param array $vv Information about URLs from pageRow to crawl.
441
     * @param array $pageRow Page row
442
     * @param int $scheduledTime Unix time to schedule indexing to, typically time()
443
     * @param int $reqMinute Number of requests per minute (creates the interleave between requests)
444
     * @param bool $submitCrawlUrls If set, submits the URLs to queue
445
     * @param bool $downloadCrawlUrls If set (and submitcrawlUrls is false) will fill $downloadUrls with entries)
446
     * @param array $duplicateTrack Array which is passed by reference and contains the an id per url to secure we will not crawl duplicates
447
     * @param array $downloadUrls Array which will be filled with URLS for download if flag is set.
448
     * @param array $incomingProcInstructions Array of processing instructions
449
     * @return string List of URLs (meant for display in backend module)
450
     */
451 7
    public function urlListFromUrlArray(
452
        array $vv,
453
        array $pageRow,
454
        $scheduledTime,
455
        $reqMinute,
456
        $submitCrawlUrls,
457
        $downloadCrawlUrls,
458
        array &$duplicateTrack,
459
        array &$downloadUrls,
460
        array $incomingProcInstructions
461
    ) {
462 7
        if (! is_array($vv['URLs'])) {
463
            return 'ERROR - no URL generated';
464
        }
465 7
        $urlLog = [];
466 7
        $pageId = (int) $pageRow['uid'];
467 7
        $configurationHash = $this->getConfigurationHash($vv);
468 7
        $skipInnerCheck = $this->queueRepository->noUnprocessedQueueEntriesForPageWithConfigurationHashExist($pageId, $configurationHash);
469
470 7
        $urlService = new UrlService();
471
472 7
        foreach ($vv['URLs'] as $urlQuery) {
473 7
            if (! $this->drawURLs_PIfilter($vv['subCfg']['procInstrFilter'], $incomingProcInstructions)) {
474
                continue;
475
            }
476 7
            $url = (string) $urlService->getUrlFromPageAndQueryParameters(
477 7
                $pageId,
478
                $urlQuery,
479 7
                $vv['subCfg']['baseUrl'] ?? null,
480 7
                $vv['subCfg']['force_ssl'] ?? 0
481
            );
482
483
            // Create key by which to determine unique-ness:
484 7
            $uKey = $url . '|' . $vv['subCfg']['userGroups'] . '|' . $vv['subCfg']['procInstrFilter'];
485
486 7
            if (isset($duplicateTrack[$uKey])) {
487
                //if the url key is registered just display it and do not resubmit is
488
                $urlLog[] = '<em><span class="text-muted">' . htmlspecialchars($url) . '</span></em>';
489
            } else {
490
                // Scheduled time:
491 7
                $schTime = $scheduledTime + round(count($duplicateTrack) * (60 / $reqMinute));
492 7
                $schTime = intval($schTime / 60) * 60;
493 7
                $formattedDate = BackendUtility::datetime($schTime);
494 7
                $this->urlList[] = '[' . $formattedDate . '] ' . $url;
495 7
                $urlList = '[' . $formattedDate . '] ' . htmlspecialchars($url);
496
497
                // Submit for crawling!
498 7
                if ($submitCrawlUrls) {
499 7
                    $added = $this->addUrl(
500 7
                        $pageId,
501
                        $url,
502 7
                        $vv['subCfg'],
503
                        $scheduledTime,
504
                        $configurationHash,
505
                        $skipInnerCheck
506
                    );
507 7
                    if ($added === false) {
508 7
                        $urlList .= ' (URL already existed)';
509
                    }
510
                } elseif ($downloadCrawlUrls) {
511
                    $downloadUrls[$url] = $url;
512
                }
513 7
                $urlLog[] = $urlList;
514
            }
515 7
            $duplicateTrack[$uKey] = true;
516
        }
517
518 7
        return implode('<br>', $urlLog);
519
    }
520
521
    /**
522
     * Returns true if input processing instruction is among registered ones.
523
     *
524
     * @param string $piString PI to test
525
     * @param array $incomingProcInstructions Processing instructions
526
     * @return boolean
527
     */
528 8
    public function drawURLs_PIfilter($piString, array $incomingProcInstructions)
529
    {
530 8
        if (empty($incomingProcInstructions)) {
531 4
            return true;
532
        }
533
534 4
        foreach ($incomingProcInstructions as $pi) {
535 4
            if (GeneralUtility::inList($piString, $pi)) {
536 2
                return true;
537
            }
538
        }
539 2
        return false;
540
    }
541
542 9
    public function getPageTSconfigForId(int $id): array
543
    {
544 9
        if (! $this->MP) {
545 9
            $pageTSconfig = BackendUtility::getPagesTSconfig($id);
546
        } else {
547
            // TODO: Please check, this makes no sense to split a boolean value.
548
            [, $mountPointId] = explode('-', $this->MP);
0 ignored issues
show
Bug introduced by
$this->MP of type true is incompatible with the type string expected by parameter $string of explode(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

548
            [, $mountPointId] = explode('-', /** @scrutinizer ignore-type */ $this->MP);
Loading history...
549
            $pageTSconfig = BackendUtility::getPagesTSconfig($mountPointId);
0 ignored issues
show
Bug introduced by
$mountPointId of type string is incompatible with the type integer expected by parameter $id of TYPO3\CMS\Backend\Utilit...ity::getPagesTSconfig(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

549
            $pageTSconfig = BackendUtility::getPagesTSconfig(/** @scrutinizer ignore-type */ $mountPointId);
Loading history...
550
        }
551
552
        // Call a hook to alter configuration
553 9
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['getPageTSconfigForId'])) {
554
            $params = [
555
                'pageId' => $id,
556
                'pageTSConfig' => &$pageTSconfig,
557
            ];
558
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['getPageTSconfigForId'] as $userFunc) {
559
                GeneralUtility::callUserFunction($userFunc, $params, $this);
560
            }
561
        }
562 9
        return $pageTSconfig;
563
    }
564
565
    /**
566
     * This methods returns an array of configurations.
567
     * Adds no urls!
568
     */
569 7
    public function getUrlsForPageId(int $pageId): array
570
    {
571
        // Get page TSconfig for page ID
572 7
        $pageTSconfig = $this->getPageTSconfigForId($pageId);
573
574 7
        $res = [];
575
576
        // Fetch Crawler Configuration from pageTSconfig
577 7
        $crawlerCfg = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.'] ?? [];
578 7
        foreach ($crawlerCfg as $key => $values) {
579 6
            if (! is_array($values)) {
580 6
                continue;
581
            }
582 6
            $key = str_replace('.', '', $key);
583
            // Sub configuration for a single configuration string:
584 6
            $subCfg = (array) $crawlerCfg[$key . '.'];
585 6
            $subCfg['key'] = $key;
586
587 6
            if (strcmp($subCfg['procInstrFilter'] ?? '', '')) {
588 6
                $subCfg['procInstrFilter'] = implode(',', GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']));
589
            }
590 6
            $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $subCfg['pidsOnly'], true));
591
592
            // process configuration if it is not page-specific or if the specific page is the current page:
593
            // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
594 6
            if (! strcmp((string) $subCfg['pidsOnly'], '') || GeneralUtility::inList($pidOnlyList, strval($pageId))) {
595
596
                // Explode, process etc.:
597 6
                $res[$key] = [];
598 6
                $res[$key]['subCfg'] = $subCfg;
599 6
                $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($crawlerCfg[$key]);
600 6
                $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
601 6
                $res[$key]['origin'] = 'pagets';
602
603
                // recognize MP value
604 6
                if (! $this->MP) {
605 6
                    $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId]);
606
                } else {
607
                    $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId . '&MP=' . $this->MP]);
0 ignored issues
show
Bug introduced by
Are you sure $this->MP of type true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

607
                    $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId . '&MP=' . /** @scrutinizer ignore-type */ $this->MP]);
Loading history...
608
                }
609
            }
610
        }
611
612
        // Get configuration from tx_crawler_configuration records up the rootline
613 7
        $crawlerConfigurations = $this->configurationRepository->getCrawlerConfigurationRecordsFromRootLine($pageId);
614 7
        foreach ($crawlerConfigurations as $configurationRecord) {
615
616
            // check access to the configuration record
617 1
            if (empty($configurationRecord['begroups']) || $this->getBackendUser()->isAdmin() || UserService::hasGroupAccess($this->getBackendUser()->user['usergroup_cached_list'], $configurationRecord['begroups'])) {
618 1
                $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $configurationRecord['pidsonly'], true));
619
620
                // process configuration if it is not page-specific or if the specific page is the current page:
621
                // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
622 1
                if (! strcmp($configurationRecord['pidsonly'], '') || GeneralUtility::inList($pidOnlyList, strval($pageId))) {
623 1
                    $key = $configurationRecord['name'];
624
625
                    // don't overwrite previously defined paramSets
626 1
                    if (! isset($res[$key])) {
627
628
                        /* @var $TSparserObject TypoScriptParser */
629 1
                        $TSparserObject = GeneralUtility::makeInstance(TypoScriptParser::class);
630 1
                        $TSparserObject->parse($configurationRecord['processing_instruction_parameters_ts']);
631
632
                        $subCfg = [
633 1
                            'procInstrFilter' => $configurationRecord['processing_instruction_filter'],
634 1
                            'procInstrParams.' => $TSparserObject->setup,
635 1
                            'baseUrl' => $configurationRecord['base_url'],
636 1
                            'force_ssl' => (int) $configurationRecord['force_ssl'],
637 1
                            'userGroups' => $configurationRecord['fegroups'],
638 1
                            'exclude' => $configurationRecord['exclude'],
639 1
                            'key' => $key,
640
                        ];
641
642 1
                        if (! in_array($pageId, $this->expandExcludeString($subCfg['exclude']), true)) {
643 1
                            $res[$key] = [];
644 1
                            $res[$key]['subCfg'] = $subCfg;
645 1
                            $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($configurationRecord['configuration']);
646 1
                            $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
647 1
                            $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], ['?id=' . $pageId]);
648 1
                            $res[$key]['origin'] = 'tx_crawler_configuration_' . $configurationRecord['uid'];
649
                        }
650
                    }
651
                }
652
            }
653
        }
654
655 7
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'] ?? [] as $func) {
656
            $params = [
657
                'res' => &$res,
658
            ];
659
            GeneralUtility::callUserFunction($func, $params, $this);
660
        }
661 7
        return $res;
662
    }
663
664
    /**
665
     * Find all configurations of subpages of a page
666
     * TODO: Write Functional Tests
667
     */
668 2
    public function getConfigurationsForBranch(int $rootid, int $depth): array
669
    {
670 2
        $configurationsForBranch = [];
671 2
        $pageTSconfig = $this->getPageTSconfigForId($rootid);
672 2
        $sets = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.'] ?? [];
673 2
        foreach ($sets as $key => $value) {
674
            if (! is_array($value)) {
675
                continue;
676
            }
677
            $configurationsForBranch[] = substr($key, -1) === '.' ? substr($key, 0, -1) : $key;
678
        }
679 2
        $pids = [];
680 2
        $rootLine = BackendUtility::BEgetRootLine($rootid);
681 2
        foreach ($rootLine as $node) {
682 1
            $pids[] = $node['uid'];
683
        }
684
        /* @var PageTreeView $tree */
685 2
        $tree = GeneralUtility::makeInstance(PageTreeView::class);
686 2
        $perms_clause = $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW);
687 2
        $tree->init(empty($perms_clause) ? '' : ('AND ' . $perms_clause));
688 2
        $tree->getTree($rootid, $depth, '');
689 2
        foreach ($tree->tree as $node) {
690
            $pids[] = $node['row']['uid'];
691
        }
692
693 2
        $configurations = $this->configurationRepository->getCrawlerConfigurationRecordsFromRootLine($rootid, $pids);
694
695 2
        foreach($configurations as $configuration) {
696 1
            $configurationsForBranch[] = $configuration['name'];
697
        }
698 2
        return $configurationsForBranch;
699
    }
700
701
    /**
702
     * Check if a user has access to an item
703
     * (e.g. get the group list of the current logged in user from $GLOBALS['TSFE']->gr_list)
704
     *
705
     * @param string $groupList Comma-separated list of (fe_)group UIDs from a user
706
     * @param string $accessList Comma-separated list of (fe_)group UIDs of the item to access
707
     * @return bool TRUE if at least one of the users group UIDs is in the access list or the access list is empty
708
     * @see \TYPO3\CMS\Frontend\Page\PageRepository::getMultipleGroupsWhereClause()
709
     * @deprecated
710
     * @codeCoverageIgnore
711
     */
712
    public function hasGroupAccess($groupList, $accessList)
713
    {
714
        if (empty($accessList)) {
715
            return true;
716
        }
717
        foreach (GeneralUtility::intExplode(',', $groupList) as $groupUid) {
718
            if (GeneralUtility::inList($accessList, $groupUid)) {
719
                return true;
720
            }
721
        }
722
        return false;
723
    }
724
725
    /**
726
     * Will expand the parameters configuration to individual values. This follows a certain syntax of the value of each parameter.
727
     * Syntax of values:
728
     * - Basically: If the value is wrapped in [...] it will be expanded according to the following syntax, otherwise the value is taken literally
729
     * - Configuration is splitted by "|" and the parts are processed individually and finally added together
730
     * - For each configuration part:
731
     *         - "[int]-[int]" = Integer range, will be expanded to all values in between, values included, starting from low to high (max. 1000). Example "1-34" or "-40--30"
732
     *         - "_TABLE:[TCA table name];[_PID:[optional page id, default is current page]];[_ENABLELANG:1]" = Look up of table records from PID, filtering out deleted records. Example "_TABLE:tt_content; _PID:123"
733
     *        _ENABLELANG:1 picks only original records without their language overlays
734
     *         - Default: Literal value
735
     *
736
     * @param array $paramArray Array with key (GET var name) and values (value of GET var which is configuration for expansion)
737
     * @param integer $pid Current page ID
738
     * @return array
739
     *
740
     * TODO: Write Functional Tests
741
     */
742 14
    public function expandParameters($paramArray, $pid)
743
    {
744
        // Traverse parameter names:
745 14
        foreach ($paramArray as $p => $v) {
746 14
            $v = trim($v);
747
748
            // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal
749 14
            if (strpos($v, '[') === 0 && substr($v, -1) === ']') {
750
                // So, find the value inside brackets and reset the paramArray value as an array.
751 14
                $v = substr($v, 1, -1);
752 14
                $paramArray[$p] = [];
753
754
                // Explode parts and traverse them:
755 14
                $parts = explode('|', $v);
756 14
                foreach ($parts as $pV) {
757
758
                    // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30)
759 14
                    if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($pV), $reg)) {
760 1
                        $reg = $this->swapIfFirstIsLargerThanSecond($reg);
761
762
                        // Traverse range, add values:
763
                        // Limit to size of range!
764 1
                        $runAwayBrake = 1000;
765 1
                        for ($a = $reg[1]; $a <= $reg[2]; $a++) {
766 1
                            $paramArray[$p][] = $a;
767 1
                            $runAwayBrake--;
768 1
                            if ($runAwayBrake <= 0) {
769
                                break;
770
                            }
771
                        }
772 13
                    } elseif (strpos(trim($pV), '_TABLE:') === 0) {
773
774
                        // Parse parameters:
775 6
                        $subparts = GeneralUtility::trimExplode(';', $pV);
776 6
                        $subpartParams = [];
777 6
                        foreach ($subparts as $spV) {
778 6
                            [$pKey, $pVal] = GeneralUtility::trimExplode(':', $spV);
779 6
                            $subpartParams[$pKey] = $pVal;
780
                        }
781
782
                        // Table exists:
783 6
                        if (isset($GLOBALS['TCA'][$subpartParams['_TABLE']])) {
784 6
                            $lookUpPid = isset($subpartParams['_PID']) ? intval($subpartParams['_PID']) : intval($pid);
785 6
                            $recursiveDepth = isset($subpartParams['_RECURSIVE']) ? intval($subpartParams['_RECURSIVE']) : 0;
786 6
                            $pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid';
787 6
                            $where = $subpartParams['_WHERE'] ?? '';
788 6
                            $addTable = $subpartParams['_ADDTABLE'] ?? '';
789
790 6
                            $fieldName = $subpartParams['_FIELD'] ? $subpartParams['_FIELD'] : 'uid';
791 6
                            if ($fieldName === 'uid' || $GLOBALS['TCA'][$subpartParams['_TABLE']]['columns'][$fieldName]) {
792 6
                                $queryBuilder = $this->getQueryBuilder($subpartParams['_TABLE']);
793
794 6
                                if ($recursiveDepth > 0) {
795
                                    /** @var QueryGenerator $queryGenerator */
796 2
                                    $queryGenerator = GeneralUtility::makeInstance(QueryGenerator::class);
797 2
                                    $pidList = $queryGenerator->getTreeList($lookUpPid, $recursiveDepth, 0, 1);
798 2
                                    $pidArray = GeneralUtility::intExplode(',', $pidList);
799
                                } else {
800 4
                                    $pidArray = [(string) $lookUpPid];
801
                                }
802
803 6
                                $queryBuilder->getRestrictions()
804 6
                                    ->removeAll()
805 6
                                    ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
806
807
                                $queryBuilder
808 6
                                    ->select($fieldName)
809 6
                                    ->from($subpartParams['_TABLE'])
810 6
                                    ->where(
811 6
                                        $queryBuilder->expr()->in($pidField, $queryBuilder->createNamedParameter($pidArray, Connection::PARAM_INT_ARRAY)),
812
                                        $where
813
                                    );
814
815 6
                                if (! empty($addTable)) {
816
                                    // TODO: Check if this works as intended!
817
                                    $queryBuilder->add('from', $addTable);
818
                                }
819 6
                                $transOrigPointerField = $GLOBALS['TCA'][$subpartParams['_TABLE']]['ctrl']['transOrigPointerField'];
820
821 6
                                if ($subpartParams['_ENABLELANG'] && $transOrigPointerField) {
822
                                    $queryBuilder->andWhere(
823
                                        $queryBuilder->expr()->lte(
824
                                            $transOrigPointerField,
825
                                            0
826
                                        )
827
                                    );
828
                                }
829
830 6
                                $statement = $queryBuilder->execute();
831
832 6
                                $rows = [];
833 6
                                while ($row = $statement->fetch()) {
834 6
                                    $rows[$row[$fieldName]] = $row;
835
                                }
836
837 6
                                if (is_array($rows)) {
838 6
                                    $paramArray[$p] = array_merge($paramArray[$p], array_keys($rows));
839
                                }
840
                            }
841
                        }
842
                    } else {
843
                        // Just add value:
844 7
                        $paramArray[$p][] = $pV;
845
                    }
846
                    // Hook for processing own expandParameters place holder
847 14
                    if (is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'])) {
848
                        $_params = [
849
                            'pObj' => &$this,
850
                            'paramArray' => &$paramArray,
851
                            'currentKey' => $p,
852
                            'currentValue' => $pV,
853
                            'pid' => $pid,
854
                        ];
855
                        foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $_funcRef) {
856
                            GeneralUtility::callUserFunction($_funcRef, $_params, $this);
857
                        }
858
                    }
859
                }
860
861
                // Make unique set of values and sort array by key:
862 14
                $paramArray[$p] = array_unique($paramArray[$p]);
863 14
                ksort($paramArray);
864
            } else {
865
                // Set the literal value as only value in array:
866 7
                $paramArray[$p] = [$v];
867
            }
868
        }
869
870 14
        return $paramArray;
871
    }
872
873
    /**
874
     * Compiling URLs from parameter array (output of expandParameters())
875
     * The number of URLs will be the multiplication of the number of parameter values for each key
876
     *
877
     * @param array $paramArray Output of expandParameters(): Array with keys (GET var names) and for each an array of values
878
     * @param array $urls URLs accumulated in this array (for recursion)
879
     * @return array
880
     */
881 11
    public function compileUrls($paramArray, array $urls)
882
    {
883 11
        if (empty($paramArray)) {
884 11
            return $urls;
885
        }
886 10
        $varName = key($paramArray);
887 10
        $valueSet = array_shift($paramArray);
888
889
        // Traverse value set:
890 10
        $newUrls = [];
891 10
        foreach ($urls as $url) {
892 9
            foreach ($valueSet as $val) {
893 9
                if (count($newUrls) < $this->getMaximumUrlsToCompile()) {
894 9
                    $newUrls[] = $url . (strcmp((string) $val, '') ? '&' . rawurlencode($varName) . '=' . rawurlencode((string) $val) : '');
895
                }
896
            }
897
        }
898 10
        return $this->compileUrls($paramArray, $newUrls);
899
    }
900
901
    /************************************
902
     *
903
     * Crawler log
904
     *
905
     ************************************/
906
907
    /**
908
     * Return array of records from crawler queue for input page ID
909
     *
910
     * @param integer $id Page ID for which to look up log entries.
911
     * @param boolean $doFlush If TRUE, then entries selected at DELETED(!) instead of selected!
912
     * @param boolean $doFullFlush
913
     * @param integer $itemsPerPage Limit the amount of entries per page default is 10
914
     * @return array
915
     *
916
     * @deprecated
917
     */
918 4
    public function getLogEntriesForPageId($id, QueueFilter $queueFilter, $doFlush = false, $doFullFlush = false, $itemsPerPage = 10)
0 ignored issues
show
Unused Code introduced by
The parameter $doFullFlush is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

918
    public function getLogEntriesForPageId($id, QueueFilter $queueFilter, $doFlush = false, /** @scrutinizer ignore-unused */ $doFullFlush = false, $itemsPerPage = 10)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
919
    {
920 4
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

920
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
921
        $queryBuilder
922 4
            ->select('*')
923 4
            ->from($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

923
            ->from(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
924 4
            ->where(
925 4
                $queryBuilder->expr()->eq('page_id', $queryBuilder->createNamedParameter($id, PDO::PARAM_INT))
926
            )
927 4
            ->orderBy('scheduled', 'DESC');
928
929 4
        $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
930 4
            ->getConnectionForTable($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

930
            ->getConnectionForTable(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
931 4
            ->getExpressionBuilder();
932 4
        $query = $expressionBuilder->andX();
0 ignored issues
show
Unused Code introduced by
The assignment to $query is dead and can be removed.
Loading history...
933
        // PHPStorm adds the highlight that the $addWhere is immediately overwritten,
934
        // but the $query = $expressionBuilder->andX() ensures that the $addWhere is written correctly with AND
935
        // between the statements, it's not a mistake in the code.
936 4
        switch ($queueFilter) {
937 4
            case 'pending':
938
                $queryBuilder->andWhere($queryBuilder->expr()->eq('exec_time', 0));
939
                break;
940 4
            case 'finished':
941
                $queryBuilder->andWhere($queryBuilder->expr()->gt('exec_time', 0));
942
                break;
943
        }
944
945 4
        if ($doFlush) {
946 2
            $this->queueRepository->flushQueue($queueFilter);
947
        }
948 4
        if ($itemsPerPage > 0) {
949
            $queryBuilder
950 4
                ->setMaxResults((int) $itemsPerPage);
951
        }
952
953 4
        return $queryBuilder->execute()->fetchAll();
954
    }
955
956
    /**
957
     * Return array of records from crawler queue for input set ID
958
     *
959
     * @param int $set_id Set ID for which to look up log entries.
960
     * @param string $filter Filter: "all" => all entries, "pending" => all that is not yet run, "finished" => all complete ones
961
     * @param bool $doFlush If TRUE, then entries selected at DELETED(!) instead of selected!
962
     * @param int $itemsPerPage Limit the amount of entries per page default is 10
963
     * @return array
964
     *
965
     * @deprecated
966
     */
967 6
    public function getLogEntriesForSetId(int $set_id, string $filter = '', bool $doFlush = false, bool $doFullFlush = false, int $itemsPerPage = 10)
968
    {
969 6
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

969
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
970
        $queryBuilder
971 6
            ->select('*')
972 6
            ->from($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

972
            ->from(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
973 6
            ->where(
974 6
                $queryBuilder->expr()->eq('set_id', $queryBuilder->createNamedParameter($set_id, PDO::PARAM_INT))
975
            )
976 6
            ->orderBy('scheduled', 'DESC');
977
978 6
        $expressionBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
979 6
            ->getConnectionForTable($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

979
            ->getConnectionForTable(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
980 6
            ->getExpressionBuilder();
981 6
        $query = $expressionBuilder->andX();
982
        // PHPStorm adds the highlight that the $addWhere is immediately overwritten,
983
        // but the $query = $expressionBuilder->andX() ensures that the $addWhere is written correctly with AND
984
        // between the statements, it's not a mistake in the code.
985 6
        $addWhere = '';
986 6
        switch ($filter) {
987 6
            case 'pending':
988 1
                $queryBuilder->andWhere($queryBuilder->expr()->eq('exec_time', 0));
989 1
                $addWhere = $query->add($expressionBuilder->eq('exec_time', 0));
0 ignored issues
show
Unused Code introduced by
The assignment to $addWhere is dead and can be removed.
Loading history...
990 1
                break;
991 5
            case 'finished':
992 1
                $queryBuilder->andWhere($queryBuilder->expr()->gt('exec_time', 0));
993 1
                $addWhere = $query->add($expressionBuilder->gt('exec_time', 0));
994 1
                break;
995
        }
996 6
        if ($doFlush) {
997 4
            $addWhere = $query->add($expressionBuilder->eq('set_id', (int) $set_id));
998 4
            $this->flushQueue($doFullFlush ? '' : $addWhere);
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...ontroller::flushQueue() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

998
            /** @scrutinizer ignore-deprecated */ $this->flushQueue($doFullFlush ? '' : $addWhere);
Loading history...
999 4
            return [];
1000
        }
1001 2
        if ($itemsPerPage > 0) {
1002
            $queryBuilder
1003 2
                ->setMaxResults((int) $itemsPerPage);
1004
        }
1005
1006 2
        return $queryBuilder->execute()->fetchAll();
1007
    }
1008
1009
    /**
1010
     * Adding call back entries to log (called from hooks typically, see indexed search class "class.crawler.php"
1011
     *
1012
     * @param integer $setId Set ID
1013
     * @param array $params Parameters to pass to call back function
1014
     * @param string $callBack Call back object reference, eg. 'EXT:indexed_search/class.crawler.php:&tx_indexedsearch_crawler'
1015
     * @param integer $page_id Page ID to attach it to
1016
     * @param integer $schedule Time at which to activate
1017
     */
1018
    public function addQueueEntry_callBack($setId, $params, $callBack, $page_id = 0, $schedule = 0): void
1019
    {
1020
        if (! is_array($params)) {
0 ignored issues
show
introduced by
The condition is_array($params) is always true.
Loading history...
1021
            $params = [];
1022
        }
1023
        $params['_CALLBACKOBJ'] = $callBack;
1024
1025
        GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME)
1026
            ->insert(
1027
                QueueRepository::TABLE_NAME,
1028
                [
1029
                    'page_id' => (int) $page_id,
1030
                    'parameters' => json_encode($params),
1031
                    'scheduled' => (int) $schedule ?: $this->getCurrentTime(),
1032
                    'exec_time' => 0,
1033
                    'set_id' => (int) $setId,
1034
                    'result_data' => '',
1035
                ]
1036
            );
1037
    }
1038
1039
    /************************************
1040
     *
1041
     * URL setting
1042
     *
1043
     ************************************/
1044
1045
    /**
1046
     * Setting a URL for crawling:
1047
     *
1048
     * @param integer $id Page ID
1049
     * @param string $url Complete URL
1050
     * @param array $subCfg Sub configuration array (from TS config)
1051
     * @param integer $tstamp Scheduled-time
1052
     * @param string $configurationHash (optional) configuration hash
1053
     * @param bool $skipInnerDuplicationCheck (optional) skip inner duplication check
1054
     * @return bool
1055
     */
1056 11
    public function addUrl(
1057
        $id,
1058
        $url,
1059
        array $subCfg,
1060
        $tstamp,
1061
        $configurationHash = '',
1062
        $skipInnerDuplicationCheck = false
1063
    ) {
1064 11
        $urlAdded = false;
1065 11
        $rows = [];
1066
1067
        // Creating parameters:
1068
        $parameters = [
1069 11
            'url' => $url,
1070
        ];
1071
1072
        // fe user group simulation:
1073 11
        $uGs = implode(',', array_unique(GeneralUtility::intExplode(',', $subCfg['userGroups'], true)));
1074 11
        if ($uGs) {
1075 1
            $parameters['feUserGroupList'] = $uGs;
1076
        }
1077
1078
        // Setting processing instructions
1079 11
        $parameters['procInstructions'] = GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']);
1080 11
        if (is_array($subCfg['procInstrParams.'])) {
1081 8
            $parameters['procInstrParams'] = $subCfg['procInstrParams.'];
1082
        }
1083
1084
        // Compile value array:
1085 11
        $parameters_serialized = json_encode($parameters);
1086
        $fieldArray = [
1087 11
            'page_id' => (int) $id,
1088 11
            'parameters' => $parameters_serialized,
1089 11
            'parameters_hash' => GeneralUtility::shortMD5($parameters_serialized),
1090 11
            'configuration_hash' => $configurationHash,
1091 11
            'scheduled' => $tstamp,
1092 11
            'exec_time' => 0,
1093 11
            'set_id' => (int) $this->setID,
1094 11
            'result_data' => '',
1095 11
            'configuration' => $subCfg['key'],
1096
        ];
1097
1098 11
        if ($this->registerQueueEntriesInternallyOnly) {
1099
            //the entries will only be registered and not stored to the database
1100 1
            $this->queueEntries[] = $fieldArray;
1101
        } else {
1102 10
            if (! $skipInnerDuplicationCheck) {
1103
                // check if there is already an equal entry
1104 9
                $rows = $this->queueRepository->getDuplicateQueueItemsIfExists(
1105 9
                    (bool) $this->extensionSettings['enableTimeslot'],
1106
                    $tstamp,
1107 9
                    $this->getCurrentTime(),
1108 9
                    $fieldArray['page_id'],
1109 9
                    $fieldArray['parameters_hash']
1110
                );
1111
            }
1112
1113 10
            if (empty($rows)) {
1114 9
                $connectionForCrawlerQueue = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME);
1115 9
                $connectionForCrawlerQueue->insert(
1116 9
                    QueueRepository::TABLE_NAME,
1117
                    $fieldArray
1118
                );
1119 9
                $uid = $connectionForCrawlerQueue->lastInsertId(QueueRepository::TABLE_NAME, 'qid');
1120 9
                $rows[] = $uid;
1121 9
                $urlAdded = true;
1122
1123 9
                $signalPayload = ['uid' => $uid, 'fieldArray' => $fieldArray];
1124 9
                SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1124
                /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1125 9
                    self::class,
1126 9
                    SignalSlotUtility::SIGNAL_URL_ADDED_TO_QUEUE,
1127
                    $signalPayload
1128
                );
1129
            } else {
1130 5
                $signalPayload = ['rows' => $rows, 'fieldArray' => $fieldArray];
1131 5
                SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1131
                /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1132 5
                    self::class,
1133 5
                    SignalSlotUtility::SIGNAL_DUPLICATE_URL_IN_QUEUE,
1134
                    $signalPayload
1135
                );
1136
            }
1137
        }
1138
1139 11
        return $urlAdded;
1140
    }
1141
1142
    /**
1143
     * Returns the current system time
1144
     *
1145
     * @return int
1146
     */
1147 4
    public function getCurrentTime()
1148
    {
1149 4
        return time();
1150
    }
1151
1152
    /************************************
1153
     *
1154
     * URL reading
1155
     *
1156
     ************************************/
1157
1158
    /**
1159
     * Read URL for single queue entry
1160
     *
1161
     * @param integer $queueId
1162
     * @param boolean $force If set, will process even if exec_time has been set!
1163
     *
1164
     * @return int|null
1165
     */
1166 2
    public function readUrl($queueId, $force = false)
1167
    {
1168 2
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(QueueRepository::TABLE_NAME);
1169 2
        $ret = 0;
1170 2
        $this->logger->debug('crawler-readurl start ' . microtime(true));
0 ignored issues
show
Bug introduced by
The method debug() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

1170
        $this->logger->/** @scrutinizer ignore-call */ 
1171
                       debug('crawler-readurl start ' . microtime(true));

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
1171
1172
        $queryBuilder
1173 2
            ->select('*')
1174 2
            ->from(QueueRepository::TABLE_NAME)
1175 2
            ->where(
1176 2
                $queryBuilder->expr()->eq('qid', $queryBuilder->createNamedParameter($queueId, PDO::PARAM_INT))
1177
            );
1178 2
        if (! $force) {
1179
            $queryBuilder
1180 2
                ->andWhere('exec_time = 0')
1181 2
                ->andWhere('process_scheduled > 0');
1182
        }
1183 2
        $queueRec = $queryBuilder->execute()->fetch();
1184
1185 2
        if (! is_array($queueRec)) {
1186
            return;
1187
        }
1188
1189 2
        SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1189
        /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1190 2
            self::class,
1191 2
            SignalSlotUtility::SIGNAL_QUEUEITEM_PREPROCESS,
1192 2
            [$queueId, &$queueRec]
1193
        );
1194
1195
        // Set exec_time to lock record:
1196 2
        $field_array = ['exec_time' => $this->getCurrentTime()];
1197
1198 2
        if (isset($this->processID)) {
1199
            //if mulitprocessing is used we need to store the id of the process which has handled this entry
1200 2
            $field_array['process_id_completed'] = $this->processID;
1201
        }
1202
1203 2
        GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME)
1204 2
            ->update(
1205 2
                QueueRepository::TABLE_NAME,
1206
                $field_array,
1207 2
                ['qid' => (int) $queueId]
1208
            );
1209
1210 2
        $result = $this->queueExecutor->executeQueueItem($queueRec, $this);
1211 2
        if ($result['content'] === null) {
1212
            $resultData = 'An errors happened';
0 ignored issues
show
Unused Code introduced by
The assignment to $resultData is dead and can be removed.
Loading history...
1213
        } else {
1214
            /** @var JsonCompatibilityConverter $jsonCompatibilityConverter */
1215 2
            $jsonCompatibilityConverter = GeneralUtility::makeInstance(JsonCompatibilityConverter::class);
1216 2
            $resultData = $jsonCompatibilityConverter->convert($result['content']);
1217
1218
            //atm there's no need to point to specific pollable extensions
1219 2
            if (is_array($resultData) && is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pollSuccess'])) {
1220
                foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pollSuccess'] as $pollable) {
1221
                    // only check the success value if the instruction is runnig
1222
                    // it is important to name the pollSuccess key same as the procInstructions key
1223
                    if (is_array($resultData['parameters']['procInstructions'])
1224
                        && in_array(
1225
                            $pollable,
1226
                            $resultData['parameters']['procInstructions'], true
1227
                        )
1228
                    ) {
1229
                        if (! empty($resultData['success'][$pollable]) && $resultData['success'][$pollable]) {
1230
                            $ret |= self::CLI_STATUS_POLLABLE_PROCESSED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Controller\C...ATUS_POLLABLE_PROCESSED has been deprecated: since 9.2.5 will be removed in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1230
                            $ret |= /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_POLLABLE_PROCESSED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
1231
                        }
1232
                    }
1233
                }
1234
            }
1235
        }
1236
        // Set result in log which also denotes the end of the processing of this entry.
1237 2
        $field_array = ['result_data' => json_encode($result)];
1238
1239 2
        SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1239
        /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1240 2
            self::class,
1241 2
            SignalSlotUtility::SIGNAL_QUEUEITEM_POSTPROCESS,
1242 2
            [$queueId, &$field_array]
1243
        );
1244
1245 2
        GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME)
1246 2
            ->update(
1247 2
                QueueRepository::TABLE_NAME,
1248
                $field_array,
1249 2
                ['qid' => (int) $queueId]
1250
            );
1251
1252 2
        $this->logger->debug('crawler-readurl stop ' . microtime(true));
1253 2
        return $ret;
1254
    }
1255
1256
    /**
1257
     * Read URL for not-yet-inserted log-entry
1258
     *
1259
     * @param array $field_array Queue field array,
1260
     *
1261
     * @return array|bool|mixed|string
1262
     */
1263
    public function readUrlFromArray($field_array)
1264
    {
1265
        // Set exec_time to lock record:
1266
        $field_array['exec_time'] = $this->getCurrentTime();
1267
        $connectionForCrawlerQueue = GeneralUtility::makeInstance(ConnectionPool::class)->getConnectionForTable(QueueRepository::TABLE_NAME);
1268
        $connectionForCrawlerQueue->insert(
1269
            QueueRepository::TABLE_NAME,
1270
            $field_array
1271
        );
1272
        $queueId = $field_array['qid'] = $connectionForCrawlerQueue->lastInsertId(QueueRepository::TABLE_NAME, 'qid');
1273
        $result = $this->queueExecutor->executeQueueItem($field_array, $this);
1274
1275
        // Set result in log which also denotes the end of the processing of this entry.
1276
        $field_array = ['result_data' => json_encode($result)];
1277
1278
        SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1278
        /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1279
            self::class,
1280
            SignalSlotUtility::SIGNAL_QUEUEITEM_POSTPROCESS,
1281
            [$queueId, &$field_array]
1282
        );
1283
1284
        $connectionForCrawlerQueue->update(
1285
            QueueRepository::TABLE_NAME,
1286
            $field_array,
1287
            ['qid' => $queueId]
1288
        );
1289
1290
        return $result;
1291
    }
1292
1293
    /*****************************
1294
     *
1295
     * Compiling URLs to crawl - tools
1296
     *
1297
     *****************************/
1298
1299
    /**
1300
     * @param integer $id Root page id to start from.
1301
     * @param integer $depth Depth of tree, 0=only id-page, 1= on sublevel, 99 = infinite
1302
     * @param integer $scheduledTime Unix Time when the URL is timed to be visited when put in queue
1303
     * @param integer $reqMinute Number of requests per minute (creates the interleave between requests)
1304
     * @param boolean $submitCrawlUrls If set, submits the URLs to queue in database (real crawling)
1305
     * @param boolean $downloadCrawlUrls If set (and submitcrawlUrls is false) will fill $downloadUrls with entries)
1306
     * @param array $incomingProcInstructions Array of processing instructions
1307
     * @param array $configurationSelection Array of configuration keys
1308
     * @return string
1309
     */
1310
    public function getPageTreeAndUrls(
1311
        $id,
1312
        $depth,
1313
        $scheduledTime,
1314
        $reqMinute,
1315
        $submitCrawlUrls,
1316
        $downloadCrawlUrls,
1317
        array $incomingProcInstructions,
1318
        array $configurationSelection
1319
    ) {
1320
        $this->scheduledTime = $scheduledTime;
1321
        $this->reqMinute = $reqMinute;
1322
        $this->submitCrawlUrls = $submitCrawlUrls;
1323
        $this->downloadCrawlUrls = $downloadCrawlUrls;
1324
        $this->incomingProcInstructions = $incomingProcInstructions;
1325
        $this->incomingConfigurationSelection = $configurationSelection;
1326
1327
        $this->duplicateTrack = [];
1328
        $this->downloadUrls = [];
1329
1330
        // Drawing tree:
1331
        /* @var PageTreeView $tree */
1332
        $tree = GeneralUtility::makeInstance(PageTreeView::class);
1333
        $perms_clause = $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW);
1334
        $tree->init('AND ' . $perms_clause);
1335
1336
        $pageInfo = BackendUtility::readPageAccess($id, $perms_clause);
1337
        if (is_array($pageInfo)) {
1338
            // Set root row:
1339
            $tree->tree[] = [
1340
                'row' => $pageInfo,
1341
                'HTML' => $this->iconFactory->getIconForRecord('pages', $pageInfo, Icon::SIZE_SMALL),
1342
            ];
1343
        }
1344
1345
        // Get branch beneath:
1346
        if ($depth) {
1347
            $tree->getTree($id, $depth, '');
1348
        }
1349
1350
        // Traverse page tree:
1351
        $code = '';
1352
1353
        foreach ($tree->tree as $data) {
1354
            $this->MP = false;
1355
1356
            // recognize mount points
1357
            if ($data['row']['doktype'] === PageRepository::DOKTYPE_MOUNTPOINT) {
1358
                $mountpage = $this->pageRepository->getPage($data['row']['uid']);
1359
1360
                // fetch mounted pages
1361
                $this->MP = $mountpage[0]['mount_pid'] . '-' . $data['row']['uid'];
0 ignored issues
show
Documentation Bug introduced by
The property $MP was declared of type boolean, but $mountpage[0]['mount_pid...' . $data['row']['uid'] is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
1362
1363
                $mountTree = GeneralUtility::makeInstance(PageTreeView::class);
1364
                $mountTree->init('AND ' . $perms_clause);
1365
                $mountTree->getTree($mountpage[0]['mount_pid'], $depth);
1366
1367
                foreach ($mountTree->tree as $mountData) {
1368
                    $code .= $this->drawURLs_addRowsForPage(
1369
                        $mountData['row'],
1370
                        $mountData['HTML'] . BackendUtility::getRecordTitle('pages', $mountData['row'], true)
1371
                    );
1372
                }
1373
1374
                // replace page when mount_pid_ol is enabled
1375
                if ($mountpage[0]['mount_pid_ol']) {
1376
                    $data['row']['uid'] = $mountpage[0]['mount_pid'];
1377
                } else {
1378
                    // if the mount_pid_ol is not set the MP must not be used for the mountpoint page
1379
                    $this->MP = false;
1380
                }
1381
            }
1382
1383
            $code .= $this->drawURLs_addRowsForPage(
1384
                $data['row'],
1385
                $data['HTML'] . BackendUtility::getRecordTitle('pages', $data['row'], true)
1386
            );
1387
        }
1388
1389
        return $code;
1390
    }
1391
1392
    /**
1393
     * Expands exclude string
1394
     *
1395
     * @param string $excludeString Exclude string
1396
     * @return array
1397
     */
1398 2
    public function expandExcludeString($excludeString)
1399
    {
1400
        // internal static caches;
1401 2
        static $expandedExcludeStringCache;
1402 2
        static $treeCache;
1403
1404 2
        if (empty($expandedExcludeStringCache[$excludeString])) {
1405 2
            $pidList = [];
1406
1407 2
            if (! empty($excludeString)) {
1408
                /** @var PageTreeView $tree */
1409 1
                $tree = GeneralUtility::makeInstance(PageTreeView::class);
1410 1
                $tree->init('AND ' . $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW));
1411
1412 1
                $excludeParts = GeneralUtility::trimExplode(',', $excludeString);
1413
1414 1
                foreach ($excludeParts as $excludePart) {
1415 1
                    [$pid, $depth] = GeneralUtility::trimExplode('+', $excludePart);
1416
1417
                    // default is "page only" = "depth=0"
1418 1
                    if (empty($depth)) {
1419 1
                        $depth = (stristr($excludePart, '+')) ? 99 : 0;
1420
                    }
1421
1422 1
                    $pidList[] = (int) $pid;
1423
1424 1
                    if ($depth > 0) {
1425
                        if (empty($treeCache[$pid][$depth])) {
1426
                            $tree->reset();
1427
                            $tree->getTree($pid, $depth);
0 ignored issues
show
Bug introduced by
$pid of type string is incompatible with the type integer expected by parameter $uid of TYPO3\CMS\Backend\Tree\V...ractTreeView::getTree(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1427
                            $tree->getTree(/** @scrutinizer ignore-type */ $pid, $depth);
Loading history...
1428
                            $treeCache[$pid][$depth] = $tree->tree;
1429
                        }
1430
1431
                        foreach ($treeCache[$pid][$depth] as $data) {
1432
                            $pidList[] = (int) $data['row']['uid'];
1433
                        }
1434
                    }
1435
                }
1436
            }
1437
1438 2
            $expandedExcludeStringCache[$excludeString] = array_unique($pidList);
1439
        }
1440
1441 2
        return $expandedExcludeStringCache[$excludeString];
1442
    }
1443
1444
    /**
1445
     * Create the rows for display of the page tree
1446
     * For each page a number of rows are shown displaying GET variable configuration
1447
     */
1448
    public function drawURLs_addRowsForPage(array $pageRow, string $pageTitle): string
1449
    {
1450
        $skipMessage = '';
1451
1452
        // Get list of configurations
1453
        $configurations = $this->getUrlsForPageRow($pageRow, $skipMessage);
1454
        $configurations = ConfigurationService::removeDisallowedConfigurations($this->incomingConfigurationSelection, $configurations);
1455
1456
        // Traverse parameter combinations:
1457
        $c = 0;
1458
        $content = '';
1459
        if (! empty($configurations)) {
1460
            foreach ($configurations as $confKey => $confArray) {
1461
1462
                // Title column:
1463
                if (! $c) {
1464
                    $titleClm = '<td rowspan="' . count($configurations) . '">' . $pageTitle . '</td>';
1465
                } else {
1466
                    $titleClm = '';
1467
                }
1468
1469
                if (! in_array($pageRow['uid'], $this->expandExcludeString($confArray['subCfg']['exclude']), true)) {
1470
1471
                    // URL list:
1472
                    $urlList = $this->urlListFromUrlArray(
1473
                        $confArray,
1474
                        $pageRow,
1475
                        $this->scheduledTime,
1476
                        $this->reqMinute,
1477
                        $this->submitCrawlUrls,
1478
                        $this->downloadCrawlUrls,
1479
                        $this->duplicateTrack,
1480
                        $this->downloadUrls,
1481
                        // if empty the urls won't be filtered by processing instructions
1482
                        $this->incomingProcInstructions
1483
                    );
1484
1485
                    // Expanded parameters:
1486
                    $paramExpanded = '';
1487
                    $calcAccu = [];
1488
                    $calcRes = 1;
1489
                    foreach ($confArray['paramExpanded'] as $gVar => $gVal) {
1490
                        $paramExpanded .= '
1491
                            <tr>
1492
                                <td>' . htmlspecialchars('&' . $gVar . '=') . '<br/>' .
1493
                            '(' . count($gVal) . ')' .
1494
                            '</td>
1495
                                <td nowrap="nowrap">' . nl2br(htmlspecialchars(implode(chr(10), $gVal))) . '</td>
1496
                            </tr>
1497
                        ';
1498
                        $calcRes *= count($gVal);
1499
                        $calcAccu[] = count($gVal);
1500
                    }
1501
                    $paramExpanded = '<table>' . $paramExpanded . '</table>';
1502
                    $paramExpanded .= 'Comb: ' . implode('*', $calcAccu) . '=' . $calcRes;
1503
1504
                    // Options
1505
                    $optionValues = '';
1506
                    if ($confArray['subCfg']['userGroups']) {
1507
                        $optionValues .= 'User Groups: ' . $confArray['subCfg']['userGroups'] . '<br/>';
1508
                    }
1509
                    if ($confArray['subCfg']['procInstrFilter']) {
1510
                        $optionValues .= 'ProcInstr: ' . $confArray['subCfg']['procInstrFilter'] . '<br/>';
1511
                    }
1512
1513
                    // Compile row:
1514
                    $content .= '
1515
                        <tr>
1516
                            ' . $titleClm . '
1517
                            <td>' . htmlspecialchars($confKey) . '</td>
1518
                            <td>' . nl2br(htmlspecialchars(rawurldecode(trim(str_replace('&', chr(10) . '&', GeneralUtility::implodeArrayForUrl('', $confArray['paramParsed'])))))) . '</td>
1519
                            <td>' . $paramExpanded . '</td>
1520
                            <td nowrap="nowrap">' . $urlList . '</td>
1521
                            <td nowrap="nowrap">' . $optionValues . '</td>
1522
                            <td nowrap="nowrap">' . DebugUtility::viewArray($confArray['subCfg']['procInstrParams.']) . '</td>
1523
                        </tr>';
1524
                } else {
1525
                    $content .= '<tr>
1526
                            ' . $titleClm . '
1527
                            <td>' . htmlspecialchars($confKey) . '</td>
1528
                            <td colspan="5"><em>No entries</em> (Page is excluded in this configuration)</td>
1529
                        </tr>';
1530
                }
1531
1532
                $c++;
1533
            }
1534
        } else {
1535
            $message = ! empty($skipMessage) ? ' (' . $skipMessage . ')' : '';
1536
1537
            // Compile row:
1538
            $content .= '
1539
                <tr>
1540
                    <td>' . $pageTitle . '</td>
1541
                    <td colspan="6"><em>No entries</em>' . $message . '</td>
1542
                </tr>';
1543
        }
1544
1545
        return $content;
1546
    }
1547
1548
    /*****************************
1549
     *
1550
     * CLI functions
1551
     *
1552
     *****************************/
1553
1554
    /**
1555
     * Running the functionality of the CLI (crawling URLs from queue)
1556
     * @deprecated
1557
     * @codeCoverageIgnore
1558
     */
1559
    public function CLI_run(int $countInARun, int $sleepTime, int $sleepAfterFinish): int
1560
    {
1561
        $result = 0;
1562
        $counter = 0;
1563
1564
        // First, run hooks:
1565
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['cli_hooks'] ?? [] as $objRef) {
1566
            trigger_error(
1567
                'This hook (crawler/cli_hooks) is deprecated since 9.1.5 and will be removed when dropping support for TYPO3 9LTS and 10LTS',
1568
                E_USER_DEPRECATED
1569
            );
1570
            $hookObj = GeneralUtility::makeInstance($objRef);
1571
            if (is_object($hookObj)) {
1572
                $hookObj->crawler_init($this);
1573
            }
1574
        }
1575
1576
        // Clean up the queue
1577
        $this->queueRepository->cleanupQueue();
1578
1579
        // Select entries:
1580
        $rows = $this->queueRepository->fetchRecordsToBeCrawled($countInARun);
1581
1582
        if (! empty($rows)) {
1583
            $quidList = [];
1584
1585
            foreach ($rows as $r) {
1586
                $quidList[] = $r['qid'];
1587
            }
1588
1589
            $processId = $this->CLI_buildProcessId();
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...r::CLI_buildProcessId() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1589
            $processId = /** @scrutinizer ignore-deprecated */ $this->CLI_buildProcessId();
Loading history...
1590
1591
            //save the number of assigned queue entries to determine how many have been processed later
1592
            $numberOfAffectedRows = $this->queueRepository->updateProcessIdAndSchedulerForQueueIds($quidList, $processId);
1593
            $this->processRepository->updateProcessAssignItemsCount($numberOfAffectedRows, $processId);
1594
1595
            if ($numberOfAffectedRows !== count($quidList)) {
1596
                return ($result | self::CLI_STATUS_ABORTED);
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Controller\C...ler::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be removed in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1596
                return ($result | /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED);

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
1597
            }
1598
1599
            foreach ($rows as $r) {
1600
                $result |= $this->readUrl($r['qid']);
1601
1602
                $counter++;
1603
                // Just to relax the system
1604
                usleep((int) $sleepTime);
1605
1606
                // if during the start and the current read url the cli has been disable we need to return from the function
1607
                // mark the process NOT as ended.
1608
                if ($this->crawler->isDisabled()) {
1609
                    return ($result | self::CLI_STATUS_ABORTED);
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Controller\C...ler::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be removed in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1609
                    return ($result | /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED);

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
1610
                }
1611
1612
                if (! $this->processRepository->isProcessActive($this->CLI_buildProcessId())) {
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...r::CLI_buildProcessId() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1612
                if (! $this->processRepository->isProcessActive(/** @scrutinizer ignore-deprecated */ $this->CLI_buildProcessId())) {
Loading history...
1613
                    $this->CLI_debug('conflict / timeout (' . $this->CLI_buildProcessId() . ')');
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...Controller::CLI_debug() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1613
                    /** @scrutinizer ignore-deprecated */ $this->CLI_debug('conflict / timeout (' . $this->CLI_buildProcessId() . ')');
Loading history...
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...r::CLI_buildProcessId() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1613
                    $this->CLI_debug('conflict / timeout (' . /** @scrutinizer ignore-deprecated */ $this->CLI_buildProcessId() . ')');
Loading history...
1614
                    $result |= self::CLI_STATUS_ABORTED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Controller\C...ler::CLI_STATUS_ABORTED has been deprecated: since 9.2.5 will be removed in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1614
                    $result |= /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_ABORTED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
1615
                    //possible timeout
1616
                    break;
1617
                }
1618
            }
1619
1620
            sleep((int) $sleepAfterFinish);
1621
        }
1622
1623
        if ($counter > 0) {
1624
            $result |= self::CLI_STATUS_PROCESSED;
0 ignored issues
show
Deprecated Code introduced by
The constant AOE\Crawler\Controller\C...r::CLI_STATUS_PROCESSED has been deprecated: since 9.2.5 will be removed in v11.x ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1624
            $result |= /** @scrutinizer ignore-deprecated */ self::CLI_STATUS_PROCESSED;

This class constant has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the constant will be removed from the class and what other constant to use instead.

Loading history...
1625
        }
1626
1627
        return $result;
1628
    }
1629
1630
    /**
1631
     * Activate hooks
1632
     * @deprecated
1633
     * @codeCoverageIgnore
1634
     */
1635
    public function CLI_runHooks(): void
1636
    {
1637
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['cli_hooks'] ?? [] as $objRef) {
1638
            $hookObj = GeneralUtility::makeInstance($objRef);
1639
            if (is_object($hookObj)) {
1640
                $hookObj->crawler_init($this);
1641
            }
1642
        }
1643
    }
1644
1645
    /**
1646
     * Try to acquire a new process with the given id
1647
     * also performs some auto-cleanup for orphan processes
1648
     * @param string $id identification string for the process
1649
     * @return boolean
1650
     * @todo preemption might not be the most elegant way to clean up
1651
     * @deprecated
1652
     * @codeCoverageIgnore
1653
     */
1654
    public function CLI_checkAndAcquireNewProcess($id)
1655
    {
1656
        $ret = true;
1657
1658
        $systemProcessId = getmypid();
1659
        if (! $systemProcessId) {
1660
            return false;
1661
        }
1662
1663
        $processCount = 0;
1664
        $orphanProcesses = [];
1665
1666
        $activeProcesses = $this->processRepository->findAllActive();
1667
        $currentTime = $this->getCurrentTime();
1668
1669
        /** @var Process $process */
1670
        foreach ($activeProcesses as $process) {
1671
            if ($process->getTtl() < $currentTime) {
1672
                $orphanProcesses[] = $process->getProcessId();
1673
            } else {
1674
                $processCount++;
1675
            }
1676
        }
1677
1678
        // if there are less than allowed active processes then add a new one
1679
        if ($processCount < (int) $this->extensionSettings['processLimit']) {
1680
            $this->processRepository->addProcess($id, $systemProcessId);
1681
        } else {
1682
            $ret = false;
1683
        }
1684
1685
        $this->processRepository->deleteProcessesMarkedAsDeleted();
1686
        $this->processRepository->markRequestedProcessesAsNotActive($orphanProcesses);
1687
        $this->queueRepository->unsetProcessScheduledAndProcessIdForQueueEntries($orphanProcesses);
1688
1689
        return $ret;
1690
    }
1691
1692
    /**
1693
     * Release a process and the required resources
1694
     *
1695
     * @param mixed $releaseIds string with a single process-id or array with multiple process-ids
1696
     * @return boolean
1697
     * @deprecated
1698
     * @codeCoverageIgnore
1699
     */
1700
    public function CLI_releaseProcesses($releaseIds)
1701
    {
1702
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1702
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1703
1704
        if (! is_array($releaseIds)) {
1705
            $releaseIds = [$releaseIds];
1706
        }
1707
1708
        if (empty($releaseIds)) {
1709
            //nothing to release
1710
            return false;
1711
        }
1712
1713
        // some kind of 2nd chance algo - this way you need at least 2 processes to have a real cleanup
1714
        // this ensures that a single process can't mess up the entire process table
1715
1716
        // mark all processes as deleted which have no "waiting" queue-entires and which are not active
1717
1718
        // ReleaseQueueEntries
1719
        $queryBuilder
1720
            ->update(QueueRepository::TABLE_NAME, 'q')
1721
            ->where(
1722
                'q.process_id IN(SELECT p.process_id FROM tx_crawler_process as p WHERE p.active = 0)'
1723
            )
1724
            ->set('q.process_scheduled', 0)
1725
            ->set('q.process_id', '')
1726
            ->execute();
1727
1728
        // FIXME: Not entirely sure that this is equivalent to the previous version
1729
        $queryBuilder->resetQueryPart('set');
1730
1731
        // ReleaseProcessEntries
1732
        $queryBuilder
1733
            ->update(ProcessRepository::TABLE_NAME)
1734
            ->where(
1735
                $queryBuilder->expr()->eq('active', 0),
1736
                'process_id IN(SELECT q.process_id FROM tx_crawler_queue as q WHERE q.exec_time = 0)'
1737
            )
1738
            ->set('system_process_id', 0)
1739
            ->execute();
1740
1741
        $this->processRepository->markRequestedProcessesAsNotActive($releaseIds);
1742
        $this->queueRepository->unsetProcessScheduledAndProcessIdForQueueEntries($releaseIds);
1743
1744
        return true;
1745
    }
1746
1747
    /**
1748
     * Create a unique Id for the current process
1749
     *
1750
     * @return string the ID
1751
     * @deprecated
1752
     * @codeCoverageIgnore
1753
     */
1754
    public function CLI_buildProcessId()
1755
    {
1756
        if (! $this->processID) {
1757
            $this->processID = GeneralUtility::shortMD5(microtime(true));
1758
        }
1759
        return $this->processID;
1760
    }
1761
1762
    /**
1763
     * Prints a message to the stdout (only if debug-mode is enabled)
1764
     *
1765
     * @param string $msg the message
1766
     * @deprecated
1767
     * @codeCoverageIgnore
1768
     */
1769
    public function CLI_debug($msg): void
1770
    {
1771
        if ((int) $this->extensionSettings['processDebug']) {
1772
            echo $msg . "\n";
1773
            flush();
1774
        }
1775
    }
1776
1777
    /**
1778
     * Cleans up entries that stayed for too long in the queue. These are:
1779
     * - processed entries that are over 1.5 days in age
1780
     * - scheduled entries that are over 7 days old
1781
     *
1782
     * @deprecated
1783
     */
1784 1
    public function cleanUpOldQueueEntries(): void
1785
    {
1786
        // 24*60*60 Seconds in 24 hours
1787 1
        $processedAgeInSeconds = $this->extensionSettings['cleanUpProcessedAge'] * 86400;
1788 1
        $scheduledAgeInSeconds = $this->extensionSettings['cleanUpScheduledAge'] * 86400;
1789
1790 1
        $now = time();
1791 1
        $condition = '(exec_time<>0 AND exec_time<' . ($now - $processedAgeInSeconds) . ') OR scheduled<=' . ($now - $scheduledAgeInSeconds);
1792 1
        $this->flushQueue($condition);
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Controller\C...ontroller::flushQueue() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1792
        /** @scrutinizer ignore-deprecated */ $this->flushQueue($condition);
Loading history...
1793 1
    }
1794
1795
    /**
1796
     * Removes queue entries
1797
     *
1798
     * @param string $where SQL related filter for the entries which should be removed
1799
     *
1800
     * @deprecated
1801
     */
1802 5
    protected function flushQueue($where = ''): void
1803
    {
1804 5
        $realWhere = strlen((string) $where) > 0 ? $where : '1=1';
1805
1806 5
        $queryBuilder = $this->getQueryBuilder($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1806
        $queryBuilder = $this->getQueryBuilder(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1807
1808
        $groups = $queryBuilder
1809 5
            ->selectLiteral('DISTINCT set_id')
1810 5
            ->from($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1810
            ->from(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1811 5
            ->where($realWhere)
1812 5
            ->execute()
1813 5
            ->fetchAll();
1814 5
        if (is_array($groups)) {
0 ignored issues
show
introduced by
The condition is_array($groups) is always true.
Loading history...
1815 5
            foreach ($groups as $group) {
1816
                $subSet = $queryBuilder
1817 4
                    ->select('qid', 'set_id')
1818 4
                    ->from($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1818
                    ->from(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1819 4
                    ->where(
1820 4
                        $realWhere,
1821 4
                        $queryBuilder->expr()->eq('set_id', $group['set_id'])
1822
                    )
1823 4
                    ->execute()
1824 4
                    ->fetchAll();
1825
1826 4
                $payLoad = ['subSet' => $subSet];
1827 4
                SignalSlotUtility::emitSignal(
0 ignored issues
show
Deprecated Code introduced by
The function AOE\Crawler\Utility\Sign...otUtility::emitSignal() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1827
                /** @scrutinizer ignore-deprecated */ SignalSlotUtility::emitSignal(
Loading history...
1828 4
                    self::class,
1829 4
                    SignalSlotUtility::SIGNAL_QUEUE_ENTRY_FLUSH,
1830
                    $payLoad
1831
                );
1832
            }
1833
        }
1834
1835
        $queryBuilder
1836 5
            ->delete($this->tableName)
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1836
            ->delete(/** @scrutinizer ignore-deprecated */ $this->tableName)

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1837 5
            ->where($realWhere)
1838 5
            ->execute();
1839 5
    }
1840
1841
    /**
1842
     * This method determines duplicates for a queue entry with the same parameters and this timestamp.
1843
     * If the timestamp is in the past, it will check if there is any unprocessed queue entry in the past.
1844
     * If the timestamp is in the future it will check, if the queued entry has exactly the same timestamp
1845
     *
1846
     * @param int $tstamp
1847
     * @param array $fieldArray
1848
     *
1849
     * @return array
1850
     * @deprecated
1851
     */
1852 5
    protected function getDuplicateRowsIfExist($tstamp, $fieldArray)
1853
    {
1854 5
        $rows = [];
1855
1856 5
        $currentTime = $this->getCurrentTime();
1857
1858 5
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($this->tableName);
0 ignored issues
show
Deprecated Code introduced by
The property AOE\Crawler\Controller\C...rController::$tableName has been deprecated: Since v9.2.5 - This will be remove in v10 ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

1858
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable(/** @scrutinizer ignore-deprecated */ $this->tableName);

This property has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the property will be removed from the class and what other property to use instead.

Loading history...
1859
        $queryBuilder
1860 5
            ->select('qid')
1861 5
            ->from(QueueRepository::TABLE_NAME);
1862
        //if this entry is scheduled with "now"
1863 5
        if ($tstamp <= $currentTime) {
1864 2
            if ($this->extensionSettings['enableTimeslot']) {
1865 1
                $timeBegin = $currentTime - 100;
1866 1
                $timeEnd = $currentTime + 100;
1867
                $queryBuilder
1868 1
                    ->where(
1869 1
                        'scheduled BETWEEN ' . $timeBegin . ' AND ' . $timeEnd . ''
1870
                    )
1871 1
                    ->orWhere(
1872 1
                        $queryBuilder->expr()->lte('scheduled', $currentTime)
1873
                    );
1874
            } else {
1875
                $queryBuilder
1876 1
                    ->where(
1877 2
                        $queryBuilder->expr()->lte('scheduled', $currentTime)
1878
                    );
1879
            }
1880 3
        } elseif ($tstamp > $currentTime) {
1881
            //entry with a timestamp in the future need to have the same schedule time
1882
            $queryBuilder
1883 3
                ->where(
1884 3
                    $queryBuilder->expr()->eq('scheduled', $tstamp)
1885
                );
1886
        }
1887
1888
        $queryBuilder
1889 5
            ->andWhere('NOT exec_time')
1890 5
            ->andWhere('NOT process_id')
1891 5
            ->andWhere($queryBuilder->expr()->eq('page_id', $queryBuilder->createNamedParameter($fieldArray['page_id'], PDO::PARAM_INT)))
1892 5
            ->andWhere($queryBuilder->expr()->eq('parameters_hash', $queryBuilder->createNamedParameter($fieldArray['parameters_hash'], PDO::PARAM_STR)));
1893
1894 5
        $statement = $queryBuilder->execute();
1895
1896 5
        while ($row = $statement->fetch()) {
1897 5
            $rows[] = $row['qid'];
1898
        }
1899
1900 5
        return $rows;
1901
    }
1902
1903
    /**
1904
     * Returns a md5 hash generated from a serialized configuration array.
1905
     *
1906
     * @return string
1907
     */
1908 13
    protected function getConfigurationHash(array $configuration)
1909
    {
1910 13
        unset($configuration['paramExpanded']);
1911 13
        unset($configuration['URLs']);
1912 13
        return md5(serialize($configuration));
1913
    }
1914
1915
    /**
1916
     * Build a URL from a Page and the Query String. If the page has a Site configuration, it can be built by using
1917
     * the Site instance.
1918
     *
1919
     * @param int $httpsOrHttp see tx_crawler_configuration.force_ssl
1920
     * @throws SiteNotFoundException
1921
     * @throws InvalidRouteArgumentsException
1922
     *
1923
     * @deprecated Using CrawlerController::getUrlFromPageAndQueryParameters() is deprecated since 9.1.1 and will be removed in v11.x, please use UrlService->getUrlFromPageAndQueryParameters() instead.
1924
     * @codeCoverageIgnore
1925
     */
1926
    protected function getUrlFromPageAndQueryParameters(int $pageId, string $queryString, ?string $alternativeBaseUrl, int $httpsOrHttp): UriInterface
1927
    {
1928
        $urlService = new UrlService();
1929
        return $urlService->getUrlFromPageAndQueryParameters($pageId, $queryString, $alternativeBaseUrl, $httpsOrHttp);
1930
    }
1931
1932 1
    protected function swapIfFirstIsLargerThanSecond(array $reg): array
1933
    {
1934
        // Swap if first is larger than last:
1935 1
        if ($reg[1] > $reg[2]) {
1936
            $temp = $reg[2];
1937
            $reg[2] = $reg[1];
1938
            $reg[1] = $temp;
1939
        }
1940
1941 1
        return $reg;
1942
    }
1943
1944 7
    protected function getPageService(): PageService
1945
    {
1946 7
        return new PageService();
1947
    }
1948
1949 9
    private function getMaximumUrlsToCompile(): int
1950
    {
1951 9
        return $this->maximumUrlsToCompile;
1952
    }
1953
1954
    /**
1955
     * @return BackendUserAuthentication
1956
     */
1957 3
    private function getBackendUser()
1958
    {
1959
        // Make sure the _cli_ user is loaded
1960 3
        Bootstrap::initializeBackendAuthentication();
1961 3
        if ($this->backendUser === null) {
1962 3
            $this->backendUser = $GLOBALS['BE_USER'];
1963
        }
1964 3
        return $this->backendUser;
1965
    }
1966
1967
    /**
1968
     * Get querybuilder for given table
1969
     *
1970
     * @return QueryBuilder
1971
     */
1972 11
    private function getQueryBuilder(string $table)
1973
    {
1974 11
        return GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($table);
1975
    }
1976
}
1977