Completed
Push — issue/252 ( cbc0a1...ca7804 )
by Tomas Norre
15:31
created

CrawlerController::getProcessFilename()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 0
dl 0
loc 4
rs 10
c 0
b 0
f 0
ccs 2
cts 2
cp 1
crap 1
1
<?php
2
namespace AOE\Crawler\Controller;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Command\CrawlerCommandLineController;
29
use AOE\Crawler\Command\FlushCommandLineController;
30
use AOE\Crawler\Command\QueueCommandLineController;
31
use AOE\Crawler\Domain\Model\Reason;
32
use AOE\Crawler\Domain\Repository\QueueRepository;
33
use AOE\Crawler\Event\EventDispatcher;
34
use AOE\Crawler\Utility\IconUtility;
35
use AOE\Crawler\Utility\SignalSlotUtility;
36
use TYPO3\CMS\Backend\Utility\BackendUtility;
37
use TYPO3\CMS\Backend\Tree\View\PageTreeView;
38
use TYPO3\CMS\Core\Authentication\BackendUserAuthentication;
39
use TYPO3\CMS\Core\Database\DatabaseConnection;
40
use TYPO3\CMS\Core\Log\LogLevel;
41
use TYPO3\CMS\Core\TimeTracker\TimeTracker;
42
use TYPO3\CMS\Core\Utility\DebugUtility;
43
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
44
use TYPO3\CMS\Core\Utility\GeneralUtility;
45
use TYPO3\CMS\Core\Utility\MathUtility;
46
use TYPO3\CMS\Extbase\Object\ObjectManager;
47
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
48
use TYPO3\CMS\Frontend\Page\PageGenerator;
49
use TYPO3\CMS\Frontend\Page\PageRepository;
50
use TYPO3\CMS\Frontend\Utility\EidUtility;
51
use TYPO3\CMS\Lang\LanguageService;
52
53
/**
54
 * Class CrawlerController
55
 *
56
 * @package AOE\Crawler\Controller
57
 */
58
class CrawlerController
59
{
60
    const CLI_STATUS_NOTHING_PROCCESSED = 0;
61
    const CLI_STATUS_REMAIN = 1; //queue not empty
62
    const CLI_STATUS_PROCESSED = 2; //(some) queue items where processed
63
    const CLI_STATUS_ABORTED = 4; //instance didn't finish
64
    const CLI_STATUS_POLLABLE_PROCESSED = 8;
65
66
    /**
67
     * @var integer
68
     */
69
    public $setID = 0;
70
71
    /**
72
     * @var string
73
     */
74
    public $processID = '';
75
76
    /**
77
     * One hour is max stalled time for the CLI
78
     * If the process had the status "start" for 3600 seconds, it will be regarded stalled and a new process is started
79
     *
80
     * @var integer
81
     */
82
    public $max_CLI_exec_time = 3600;
83
84
    /**
85
     * @var array
86
     */
87
    public $duplicateTrack = [];
88
89
    /**
90
     * @var array
91
     */
92
    public $downloadUrls = [];
93
94
    /**
95
     * @var array
96
     */
97
    public $incomingProcInstructions = [];
98
99
    /**
100
     * @var array
101
     */
102
    public $incomingConfigurationSelection = [];
103
104
    /**
105
     * @var bool
106
     */
107
    public $registerQueueEntriesInternallyOnly = false;
108
109
    /**
110
     * @var array
111
     */
112
    public $queueEntries = [];
113
114
    /**
115
     * @var array
116
     */
117
    public $urlList = [];
118
119
    /**
120
     * @var boolean
121
     */
122
    public $debugMode = false;
123
124
    /**
125
     * @var array
126
     */
127
    public $extensionSettings = [];
128
129
    /**
130
     * Mount Point
131
     *
132
     * @var boolean
133
     */
134
    public $MP = false;
135
136
    /**
137
     * @var string
138
     */
139
    protected $processFilename;
140
141
    /**
142
     * Holds the internal access mode can be 'gui','cli' or 'cli_im'
143
     *
144
     * @var string
145
     */
146
    protected $accessMode;
147
148
    /**
149
     * @var DatabaseConnection
150
     */
151
    private $db;
152
153
    /**
154
     * @var BackendUserAuthentication
155
     */
156
    private $backendUser;
157
158
    /**
159
     * @var integer
160
     */
161
    private $scheduledTime = 0;
162
163
    /**
164
     * @var integer
165
     */
166
    private $reqMinute = 0;
167
168
    /**
169
     * @var bool
170
     */
171
    private $submitCrawlUrls = false;
172
173
    /**
174
     * @var bool
175
     */
176
    private $downloadCrawlUrls = false;
177
178
    /**
179
     * @var QueueRepository
180
     */
181
    protected  $queueRepository;
182
183
    /**
184
     * Method to set the accessMode can be gui, cli or cli_im
185
     *
186
     * @return string
187
     */
188 1
    public function getAccessMode()
189
    {
190 1
        return $this->accessMode;
191
    }
192
193
    /**
194
     * @param string $accessMode
195
     */
196 1
    public function setAccessMode($accessMode)
197
    {
198 1
        $this->accessMode = $accessMode;
199 1
    }
200
201
    /**
202
     * Set disabled status to prevent processes from being processed
203
     *
204
     * @param  bool $disabled (optional, defaults to true)
205
     * @return void
206
     */
207 3
    public function setDisabled($disabled = true)
208
    {
209 3
        if ($disabled) {
210 2
            GeneralUtility::writeFile($this->processFilename, '');
211
        } else {
212 1
            if (is_file($this->processFilename)) {
213 1
                unlink($this->processFilename);
214
            }
215
        }
216 3
    }
217
218
    /**
219
     * Get disable status
220
     *
221
     * @return bool true if disabled
222
     */
223 3
    public function getDisabled()
224
    {
225 3
        if (is_file($this->processFilename)) {
226 2
            return true;
227
        } else {
228 1
            return false;
229
        }
230
    }
231
232
    /**
233
     * @param string $filenameWithPath
234
     *
235
     * @return void
236
     */
237 4
    public function setProcessFilename($filenameWithPath)
238
    {
239 4
        $this->processFilename = $filenameWithPath;
240 4
    }
241
242
    /**
243
     * @return string
244
     */
245 1
    public function getProcessFilename()
246
    {
247 1
        return $this->processFilename;
248
    }
249
250
    /************************************
251
     *
252
     * Getting URLs based on Page TSconfig
253
     *
254
     ************************************/
255
256 28
    public function __construct()
257
    {
258 28
        $objectManager = GeneralUtility::makeInstance(ObjectManager::class);
259 28
        $this->queueRepository = $objectManager->get(QueueRepository::class);
260
261 28
        $this->db = $GLOBALS['TYPO3_DB'];
262 28
        $this->backendUser = $GLOBALS['BE_USER'];
263 28
        $this->processFilename = PATH_site . 'typo3temp/tx_crawler.proc';
264
265 28
        $settings = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['crawler']);
266 28
        $settings = is_array($settings) ? $settings : [];
267
268
        // read ext_em_conf_template settings and set
269 28
        $this->setExtensionSettings($settings);
270
271
        // set defaults:
272 28
        if (MathUtility::convertToPositiveInteger($this->extensionSettings['countInARun']) == 0) {
273 21
            $this->extensionSettings['countInARun'] = 100;
274
        }
275
276 28
        $this->extensionSettings['processLimit'] = MathUtility::forceIntegerInRange($this->extensionSettings['processLimit'], 1, 99, 1);
277 28
    }
278
279
    /**
280
     * Sets the extensions settings (unserialized pendant of $TYPO3_CONF_VARS['EXT']['extConf']['crawler']).
281
     *
282
     * @param array $extensionSettings
283
     * @return void
284
     */
285 37
    public function setExtensionSettings(array $extensionSettings)
286
    {
287 37
        $this->extensionSettings = $extensionSettings;
288 37
    }
289
290
    /**
291
     * Check if the given page should be crawled
292
     *
293
     * @param array $pageRow
294
     * @return false|string false if the page should be crawled (not excluded), true / skipMessage if it should be skipped
295
     */
296 10
    public function checkIfPageShouldBeSkipped(array $pageRow)
297
    {
298 10
        $skipPage = false;
299 10
        $skipMessage = 'Skipped'; // message will be overwritten later
300
301
        // if page is hidden
302 10
        if (!$this->extensionSettings['crawlHiddenPages']) {
303 10
            if ($pageRow['hidden']) {
304 1
                $skipPage = true;
305 1
                $skipMessage = 'Because page is hidden';
306
            }
307
        }
308
309 10
        if (!$skipPage) {
310 9
            if (GeneralUtility::inList('3,4', $pageRow['doktype']) || $pageRow['doktype'] >= 199) {
311 3
                $skipPage = true;
312 3
                $skipMessage = 'Because doktype is not allowed';
313
            }
314
        }
315
316 10
        if (!$skipPage) {
317 6
            if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['excludeDoktype'])) {
318 2
                foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['excludeDoktype'] as $key => $doktypeList) {
319 1
                    if (GeneralUtility::inList($doktypeList, $pageRow['doktype'])) {
320 1
                        $skipPage = true;
321 1
                        $skipMessage = 'Doktype was excluded by "' . $key . '"';
322 1
                        break;
323
                    }
324
                }
325
            }
326
        }
327
328 10
        if (!$skipPage) {
329
            // veto hook
330 5
            if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pageVeto'])) {
331
                foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pageVeto'] as $key => $func) {
332
                    $params = [
333
                        'pageRow' => $pageRow
334
                    ];
335
                    // expects "false" if page is ok and "true" or a skipMessage if this page should _not_ be crawled
336
                    $veto = GeneralUtility::callUserFunction($func, $params, $this);
337
                    if ($veto !== false) {
338
                        $skipPage = true;
339
                        if (is_string($veto)) {
340
                            $skipMessage = $veto;
341
                        } else {
342
                            $skipMessage = 'Veto from hook "' . htmlspecialchars($key) . '"';
343
                        }
344
                        // no need to execute other hooks if a previous one return a veto
345
                        break;
346
                    }
347
                }
348
            }
349
        }
350
351 10
        return $skipPage ? $skipMessage : false;
352
    }
353
354
    /**
355
     * Wrapper method for getUrlsForPageId()
356
     * It returns an array of configurations and no urls!
357
     *
358
     * @param array $pageRow Page record with at least dok-type and uid columns.
359
     * @param string $skipMessage
360
     * @return array
361
     * @see getUrlsForPageId()
362
     */
363 6
    public function getUrlsForPageRow(array $pageRow, &$skipMessage = '')
364
    {
365 6
        $message = $this->checkIfPageShouldBeSkipped($pageRow);
366
367 6
        if ($message === false) {
368 5
            $forceSsl = ($pageRow['url_scheme'] === 2) ? true : false;
369 5
            $res = $this->getUrlsForPageId($pageRow['uid'], $forceSsl);
370 5
            $skipMessage = '';
371
        } else {
372 1
            $skipMessage = $message;
373 1
            $res = [];
374
        }
375
376 6
        return $res;
377
    }
378
379
    /**
380
     * This method is used to count if there are ANY unprocessed queue entries
381
     * of a given page_id and the configuration which matches a given hash.
382
     * If there if none, we can skip an inner detail check
383
     *
384
     * @param  int $uid
385
     * @param  string $configurationHash
386
     * @return boolean
387
     */
388 7
    protected function noUnprocessedQueueEntriesForPageWithConfigurationHashExist($uid, $configurationHash)
389
    {
390 7
        $configurationHash = $this->db->fullQuoteStr($configurationHash, 'tx_crawler_queue');
391 7
        $res = $this->db->exec_SELECTquery('count(*) as anz', 'tx_crawler_queue', "page_id=" . intval($uid) . " AND configuration_hash=" . $configurationHash . " AND exec_time=0");
392 7
        $row = $this->db->sql_fetch_assoc($res);
393
394 7
        return ($row['anz'] == 0);
395
    }
396
397
    /**
398
     * Creates a list of URLs from input array (and submits them to queue if asked for)
399
     * See Web > Info module script + "indexed_search"'s crawler hook-client using this!
400
     *
401
     * @param    array        Information about URLs from pageRow to crawl.
402
     * @param    array        Page row
403
     * @param    integer        Unix time to schedule indexing to, typically time()
404
     * @param    integer        Number of requests per minute (creates the interleave between requests)
405
     * @param    boolean        If set, submits the URLs to queue
406
     * @param    boolean        If set (and submitcrawlUrls is false) will fill $downloadUrls with entries)
407
     * @param    array        Array which is passed by reference and contains the an id per url to secure we will not crawl duplicates
408
     * @param    array        Array which will be filled with URLS for download if flag is set.
409
     * @param    array        Array of processing instructions
410
     * @return    string        List of URLs (meant for display in backend module)
411
     *
412
     */
413 4
    public function urlListFromUrlArray(
414
    array $vv,
415
    array $pageRow,
416
    $scheduledTime,
417
    $reqMinute,
418
    $submitCrawlUrls,
419
    $downloadCrawlUrls,
420
    array &$duplicateTrack,
421
    array &$downloadUrls,
422
    array $incomingProcInstructions
423
    ) {
424 4
        $urlList = '';
425
        // realurl support (thanks to Ingo Renner)
426 4
        if (ExtensionManagementUtility::isLoaded('realurl') && $vv['subCfg']['realurl']) {
427
428
            /** @var tx_realurl $urlObj */
429
            $urlObj = GeneralUtility::makeInstance('tx_realurl');
430
431
            if (!empty($vv['subCfg']['baseUrl'])) {
432
                $urlParts = parse_url($vv['subCfg']['baseUrl']);
433
                $host = strtolower($urlParts['host']);
434
                $urlObj->host = $host;
435
436
                // First pass, finding configuration OR pointer string:
437
                $urlObj->extConf = isset($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['realurl'][$urlObj->host]) ? $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['realurl'][$urlObj->host] : $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['realurl']['_DEFAULT'];
438
439
                // If it turned out to be a string pointer, then look up the real config:
440
                if (is_string($urlObj->extConf)) {
441
                    $urlObj->extConf = is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['realurl'][$urlObj->extConf]) ? $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['realurl'][$urlObj->extConf] : $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['realurl']['_DEFAULT'];
442
                }
443
            }
444
445
            if (!$GLOBALS['TSFE']->sys_page) {
446
                $GLOBALS['TSFE']->sys_page = GeneralUtility::makeInstance('TYPO3\CMS\Frontend\Page\PageRepository');
447
            }
448
449
            if (!$GLOBALS['TSFE']->tmpl->rootLine[0]['uid']) {
450
                $GLOBALS['TSFE']->tmpl->rootLine[0]['uid'] = $urlObj->extConf['pagePath']['rootpage_id'];
451
            }
452
        }
453
454 4
        if (is_array($vv['URLs'])) {
455 4
            $configurationHash = $this->getConfigurationHash($vv);
456 4
            $skipInnerCheck = $this->noUnprocessedQueueEntriesForPageWithConfigurationHashExist($pageRow['uid'], $configurationHash);
457
458 4
            foreach ($vv['URLs'] as $urlQuery) {
459 4
                if ($this->drawURLs_PIfilter($vv['subCfg']['procInstrFilter'], $incomingProcInstructions)) {
460
461
                    // Calculate cHash:
462 4
                    if ($vv['subCfg']['cHash']) {
463
                        /* @var $cacheHash \TYPO3\CMS\Frontend\Page\CacheHashCalculator */
464
                        $cacheHash = GeneralUtility::makeInstance('TYPO3\CMS\Frontend\Page\CacheHashCalculator');
465
                        $urlQuery .= '&cHash=' . $cacheHash->generateForParameters($urlQuery);
466
                    }
467
468
                    // Create key by which to determine unique-ness:
469 4
                    $uKey = $urlQuery . '|' . $vv['subCfg']['userGroups'] . '|' . $vv['subCfg']['baseUrl'] . '|' . $vv['subCfg']['procInstrFilter'];
470
471
                    // realurl support (thanks to Ingo Renner)
472 4
                    $urlQuery = 'index.php' . $urlQuery;
473 4
                    if (ExtensionManagementUtility::isLoaded('realurl') && $vv['subCfg']['realurl']) {
474
                        $params = [
475
                            'LD' => [
476
                                'totalURL' => $urlQuery
477
                            ],
478
                            'TCEmainHook' => true
479
                        ];
480
                        $urlObj->encodeSpURL($params);
0 ignored issues
show
Bug introduced by
The variable $urlObj does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
481
                        $urlQuery = $params['LD']['totalURL'];
482
                    }
483
484
                    // Scheduled time:
485 4
                    $schTime = $scheduledTime + round(count($duplicateTrack) * (60 / $reqMinute));
486 4
                    $schTime = floor($schTime / 60) * 60;
487
488 4
                    if (isset($duplicateTrack[$uKey])) {
489
490
                        //if the url key is registered just display it and do not resubmit is
491
                        $urlList = '<em><span class="typo3-dimmed">' . htmlspecialchars($urlQuery) . '</span></em><br/>';
492
                    } else {
493 4
                        $urlList = '[' . date('d.m.y H:i', $schTime) . '] ' . htmlspecialchars($urlQuery);
494 4
                        $this->urlList[] = '[' . date('d.m.y H:i', $schTime) . '] ' . $urlQuery;
495
496 4
                        $theUrl = ($vv['subCfg']['baseUrl'] ? $vv['subCfg']['baseUrl'] : GeneralUtility::getIndpEnv('TYPO3_SITE_URL')) . $urlQuery;
497
498
                        // Submit for crawling!
499 4
                        if ($submitCrawlUrls) {
500 4
                            $added = $this->addUrl(
501 4
                            $pageRow['uid'],
502 4
                            $theUrl,
503 4
                            $vv['subCfg'],
504 4
                            $scheduledTime,
505 4
                            $configurationHash,
506 4
                            $skipInnerCheck
507
                            );
508 4
                            if ($added === false) {
509 4
                                $urlList .= ' (Url already existed)';
510
                            }
511
                        } elseif ($downloadCrawlUrls) {
512
                            $downloadUrls[$theUrl] = $theUrl;
513
                        }
514
515 4
                        $urlList .= '<br />';
516
                    }
517 4
                    $duplicateTrack[$uKey] = true;
518
                }
519
            }
520
        } else {
521
            $urlList = 'ERROR - no URL generated';
522
        }
523
524 4
        return $urlList;
525
    }
526
527
    /**
528
     * Returns true if input processing instruction is among registered ones.
529
     *
530
     * @param string $piString PI to test
531
     * @param array $incomingProcInstructions Processing instructions
532
     * @return boolean
533
     */
534 5
    public function drawURLs_PIfilter($piString, array $incomingProcInstructions)
535
    {
536 5
        if (empty($incomingProcInstructions)) {
537 1
            return true;
538
        }
539
540 4
        foreach ($incomingProcInstructions as $pi) {
541 4
            if (GeneralUtility::inList($piString, $pi)) {
542 4
                return true;
543
            }
544
        }
545 2
    }
546
547 4
    public function getPageTSconfigForId($id)
548
    {
549 4
        if (!$this->MP) {
550 4
            $pageTSconfig = BackendUtility::getPagesTSconfig($id);
551
        } else {
552
            list(, $mountPointId) = explode('-', $this->MP);
553
            $pageTSconfig = BackendUtility::getPagesTSconfig($mountPointId);
554
        }
555
556
        // Call a hook to alter configuration
557 4
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['getPageTSconfigForId'])) {
558
            $params = [
559
                'pageId' => $id,
560
                'pageTSConfig' => &$pageTSconfig
561
            ];
562
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['getPageTSconfigForId'] as $userFunc) {
563
                GeneralUtility::callUserFunction($userFunc, $params, $this);
564
            }
565
        }
566
567 4
        return $pageTSconfig;
568
    }
569
570
    /**
571
     * This methods returns an array of configurations.
572
     * And no urls!
573
     *
574
     * @param integer $id Page ID
575
     * @param bool $forceSsl Use https
576
     * @return array
577
     *
578
     * TODO: Should be switched back to protected - TNM 2018-11-16
579
     */
580 4
    public function getUrlsForPageId($id, $forceSsl = false)
581
    {
582
583
        /**
584
         * Get configuration from tsConfig
585
         */
586
587
        // Get page TSconfig for page ID:
588 4
        $pageTSconfig = $this->getPageTSconfigForId($id);
589
590 4
        $res = [];
591
592 4
        if (is_array($pageTSconfig) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.'])) {
593 3
            $crawlerCfg = $pageTSconfig['tx_crawler.']['crawlerCfg.'];
594
595 3
            if (is_array($crawlerCfg['paramSets.'])) {
596 3
                foreach ($crawlerCfg['paramSets.'] as $key => $values) {
597 3
                    if (is_array($values)) {
598 3
                        $key = str_replace('.', '', $key);
599
                        // Sub configuration for a single configuration string:
600 3
                        $subCfg = (array)$crawlerCfg['paramSets.'][$key . '.'];
601 3
                        $subCfg['key'] = $key;
602
603 3
                        if (strcmp($subCfg['procInstrFilter'], '')) {
604 3
                            $subCfg['procInstrFilter'] = implode(',', GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']));
605
                        }
606 3
                        $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $subCfg['pidsOnly'], true));
607
608
                        // process configuration if it is not page-specific or if the specific page is the current page:
609 3
                        if (!strcmp($subCfg['pidsOnly'], '') || GeneralUtility::inList($pidOnlyList, $id)) {
610
611
                                // add trailing slash if not present
612 3
                            if (!empty($subCfg['baseUrl']) && substr($subCfg['baseUrl'], -1) != '/') {
613
                                $subCfg['baseUrl'] .= '/';
614
                            }
615
616
                            // Explode, process etc.:
617 3
                            $res[$key] = [];
618 3
                            $res[$key]['subCfg'] = $subCfg;
619 3
                            $res[$key]['paramParsed'] = $this->parseParams($crawlerCfg['paramSets.'][$key]);
620 3
                            $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $id);
621 3
                            $res[$key]['origin'] = 'pagets';
622
623
                            // recognize MP value
624 3
                            if (!$this->MP) {
625 3
                                $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], ['?id=' . $id]);
626
                            } else {
627 3
                                $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], ['?id=' . $id . '&MP=' . $this->MP]);
628
                            }
629
                        }
630
                    }
631
                }
632
            }
633
        }
634
635
        /**
636
         * Get configuration from tx_crawler_configuration records
637
         */
638
639
        // get records along the rootline
640 4
        $rootLine = BackendUtility::BEgetRootLine($id);
641
642 4
        foreach ($rootLine as $page) {
643 4
            $configurationRecordsForCurrentPage = BackendUtility::getRecordsByField(
0 ignored issues
show
Deprecated Code introduced by
The method TYPO3\CMS\Backend\Utilit...ty::getRecordsByField() has been deprecated with message: since TYPO3 v8, will be removed in TYPO3 v9

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
644 4
                'tx_crawler_configuration',
645 4
                'pid',
646 4
                intval($page['uid']),
647 4
                BackendUtility::BEenableFields('tx_crawler_configuration') . BackendUtility::deleteClause('tx_crawler_configuration')
648
            );
649
650 4
            if (is_array($configurationRecordsForCurrentPage)) {
651 1
                foreach ($configurationRecordsForCurrentPage as $configurationRecord) {
652
653
                        // check access to the configuration record
654 1
                    if (empty($configurationRecord['begroups']) || $GLOBALS['BE_USER']->isAdmin() || $this->hasGroupAccess($GLOBALS['BE_USER']->user['usergroup_cached_list'], $configurationRecord['begroups'])) {
655 1
                        $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $configurationRecord['pidsonly'], true));
656
657
                        // process configuration if it is not page-specific or if the specific page is the current page:
658 1
                        if (!strcmp($configurationRecord['pidsonly'], '') || GeneralUtility::inList($pidOnlyList, $id)) {
659 1
                            $key = $configurationRecord['name'];
660
661
                            // don't overwrite previously defined paramSets
662 1
                            if (!isset($res[$key])) {
663
664
                                    /* @var $TSparserObject \TYPO3\CMS\Core\TypoScript\Parser\TypoScriptParser */
665 1
                                $TSparserObject = GeneralUtility::makeInstance('TYPO3\CMS\Core\TypoScript\Parser\TypoScriptParser');
666 1
                                $TSparserObject->parse($configurationRecord['processing_instruction_parameters_ts']);
667
668 1
                                $isCrawlingProtocolHttps = $this->isCrawlingProtocolHttps($configurationRecord['force_ssl'], $forceSsl);
669
670
                                $subCfg = [
671 1
                                    'procInstrFilter' => $configurationRecord['processing_instruction_filter'],
672 1
                                    'procInstrParams.' => $TSparserObject->setup,
673 1
                                    'baseUrl' => $this->getBaseUrlForConfigurationRecord(
674 1
                                        $configurationRecord['base_url'],
675 1
                                        $configurationRecord['sys_domain_base_url'],
676 1
                                        $isCrawlingProtocolHttps
677
                                    ),
678 1
                                    'realurl' => $configurationRecord['realurl'],
679 1
                                    'cHash' => $configurationRecord['chash'],
680 1
                                    'userGroups' => $configurationRecord['fegroups'],
681 1
                                    'exclude' => $configurationRecord['exclude'],
682 1
                                    'rootTemplatePid' => (int) $configurationRecord['root_template_pid'],
683 1
                                    'key' => $key
684
                                ];
685
686
                                // add trailing slash if not present
687 1
                                if (!empty($subCfg['baseUrl']) && substr($subCfg['baseUrl'], -1) != '/') {
688
                                    $subCfg['baseUrl'] .= '/';
689
                                }
690 1
                                if (!in_array($id, $this->expandExcludeString($subCfg['exclude']))) {
691 1
                                    $res[$key] = [];
692 1
                                    $res[$key]['subCfg'] = $subCfg;
693 1
                                    $res[$key]['paramParsed'] = $this->parseParams($configurationRecord['configuration']);
694 1
                                    $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $id);
695 1
                                    $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], ['?id=' . $id]);
696 4
                                    $res[$key]['origin'] = 'tx_crawler_configuration_' . $configurationRecord['uid'];
697
                                }
698
                            }
699
                        }
700
                    }
701
                }
702
            }
703
        }
704
705 4
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'])) {
706
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'] as $func) {
707
                $params = [
708
                    'res' => &$res,
709
                ];
710
                GeneralUtility::callUserFunction($func, $params, $this);
711
            }
712
        }
713
714 4
        return $res;
715
    }
716
717
    /**
718
     * Checks if a domain record exist and returns the base-url based on the record. If not the given baseUrl string is used.
719
     *
720
     * @param string $baseUrl
721
     * @param integer $sysDomainUid
722
     * @param bool $ssl
723
     * @return string
724
     */
725 4
    protected function getBaseUrlForConfigurationRecord($baseUrl, $sysDomainUid, $ssl = false)
726
    {
727 4
        $sysDomainUid = intval($sysDomainUid);
728 4
        $urlScheme = ($ssl === false) ? 'http' : 'https';
729
730 4
        if ($sysDomainUid > 0) {
731 2
            $res = $this->db->exec_SELECTquery(
732 2
                '*',
733 2
                'sys_domain',
734 2
                'uid = ' . $sysDomainUid .
735 2
                BackendUtility::BEenableFields('sys_domain') .
736 2
                BackendUtility::deleteClause('sys_domain')
737
            );
738 2
            $row = $this->db->sql_fetch_assoc($res);
739 2
            if ($row['domainName'] != '') {
740 1
                return $urlScheme . '://' . $row['domainName'];
741
            }
742
        }
743 3
        return $baseUrl;
744
    }
745
746
    public function getConfigurationsForBranch($rootid, $depth)
747
    {
748
        $configurationsForBranch = [];
749
750
        $pageTSconfig = $this->getPageTSconfigForId($rootid);
751
        if (is_array($pageTSconfig) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.']) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.'])) {
752
            $sets = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.'];
753
            if (is_array($sets)) {
754
                foreach ($sets as $key => $value) {
755
                    if (!is_array($value)) {
756
                        continue;
757
                    }
758
                    $configurationsForBranch[] = substr($key, -1) == '.' ? substr($key, 0, -1) : $key;
759
                }
760
            }
761
        }
762
        $pids = [];
763
        $rootLine = BackendUtility::BEgetRootLine($rootid);
764
        foreach ($rootLine as $node) {
765
            $pids[] = $node['uid'];
766
        }
767
        /* @var PageTreeView $tree */
768
        $tree = GeneralUtility::makeInstance(PageTreeView::class);
769
        $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1);
770
        $tree->init('AND ' . $perms_clause);
771
        $tree->getTree($rootid, $depth, '');
772
        foreach ($tree->tree as $node) {
773
            $pids[] = $node['row']['uid'];
774
        }
775
776
        $res = $this->db->exec_SELECTquery(
777
            '*',
778
            'tx_crawler_configuration',
779
            'pid IN (' . implode(',', $pids) . ') ' .
780
            BackendUtility::BEenableFields('tx_crawler_configuration') .
781
            BackendUtility::deleteClause('tx_crawler_configuration') . ' ' .
782
            BackendUtility::versioningPlaceholderClause('tx_crawler_configuration') . ' '
783
        );
784
785
        while ($row = $this->db->sql_fetch_assoc($res)) {
786
            $configurationsForBranch[] = $row['name'];
787
        }
788
        $this->db->sql_free_result($res);
789
        return $configurationsForBranch;
790
    }
791
792
    /**
793
     * Check if a user has access to an item
794
     * (e.g. get the group list of the current logged in user from $GLOBALS['TSFE']->gr_list)
795
     *
796
     * @see \TYPO3\CMS\Frontend\Page\PageRepository::getMultipleGroupsWhereClause()
797
     * @param  string $groupList    Comma-separated list of (fe_)group UIDs from a user
798
     * @param  string $accessList   Comma-separated list of (fe_)group UIDs of the item to access
799
     * @return bool                 TRUE if at least one of the users group UIDs is in the access list or the access list is empty
800
     */
801 3
    public function hasGroupAccess($groupList, $accessList)
802
    {
803 3
        if (empty($accessList)) {
804 1
            return true;
805
        }
806 2
        foreach (GeneralUtility::intExplode(',', $groupList) as $groupUid) {
807 2
            if (GeneralUtility::inList($accessList, $groupUid)) {
808 2
                return true;
809
            }
810
        }
811 1
        return false;
812
    }
813
814
    /**
815
     * Parse GET vars of input Query into array with key=>value pairs
816
     *
817
     * @param string $inputQuery Input query string
818
     * @return array
819
     */
820 7
    public function parseParams($inputQuery)
821
    {
822
        // Extract all GET parameters into an ARRAY:
823 7
        $paramKeyValues = [];
824 7
        $GETparams = explode('&', $inputQuery);
825
826 7
        foreach ($GETparams as $paramAndValue) {
827 7
            list($p, $v) = explode('=', $paramAndValue, 2);
828 7
            if (strlen($p)) {
829 7
                $paramKeyValues[rawurldecode($p)] = rawurldecode($v);
830
            }
831
        }
832
833 7
        return $paramKeyValues;
834
    }
835
836
    /**
837
     * Will expand the parameters configuration to individual values. This follows a certain syntax of the value of each parameter.
838
     * Syntax of values:
839
     * - Basically: If the value is wrapped in [...] it will be expanded according to the following syntax, otherwise the value is taken literally
840
     * - Configuration is splitted by "|" and the parts are processed individually and finally added together
841
     * - For each configuration part:
842
     *         - "[int]-[int]" = Integer range, will be expanded to all values in between, values included, starting from low to high (max. 1000). Example "1-34" or "-40--30"
843
     *         - "_TABLE:[TCA table name];[_PID:[optional page id, default is current page]];[_ENABLELANG:1]" = Look up of table records from PID, filtering out deleted records. Example "_TABLE:tt_content; _PID:123"
844
     *        _ENABLELANG:1 picks only original records without their language overlays
845
     *         - Default: Literal value
846
     *
847
     * @param array $paramArray Array with key (GET var name) and values (value of GET var which is configuration for expansion)
848
     * @param integer $pid Current page ID
849
     * @return array
850
     */
851 4
    public function expandParameters($paramArray, $pid)
852
    {
853 4
        global $TCA;
854
855
        // Traverse parameter names:
856 4
        foreach ($paramArray as $p => $v) {
857 4
            $v = trim($v);
858
859
            // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal
860 4
            if (substr($v, 0, 1) === '[' && substr($v, -1) === ']') {
861
                // So, find the value inside brackets and reset the paramArray value as an array.
862 4
                $v = substr($v, 1, -1);
863 4
                $paramArray[$p] = [];
864
865
                // Explode parts and traverse them:
866 4
                $parts = explode('|', $v);
867 4
                foreach ($parts as $pV) {
868
869
                        // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30)
870 4
                    if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($pV), $reg)) {
871
872
                        // Swap if first is larger than last:
873
                        if ($reg[1] > $reg[2]) {
874
                            $temp = $reg[2];
875
                            $reg[2] = $reg[1];
876
                            $reg[1] = $temp;
877
                        }
878
879
                        // Traverse range, add values:
880
                        $runAwayBrake = 1000; // Limit to size of range!
881
                        for ($a = $reg[1]; $a <= $reg[2];$a++) {
882
                            $paramArray[$p][] = $a;
883
                            $runAwayBrake--;
884
                            if ($runAwayBrake <= 0) {
885
                                break;
886
                            }
887
                        }
888 4
                    } elseif (substr(trim($pV), 0, 7) == '_TABLE:') {
889
890
                        // Parse parameters:
891
                        $subparts = GeneralUtility::trimExplode(';', $pV);
892
                        $subpartParams = [];
893
                        foreach ($subparts as $spV) {
894
                            list($pKey, $pVal) = GeneralUtility::trimExplode(':', $spV);
895
                            $subpartParams[$pKey] = $pVal;
896
                        }
897
898
                        // Table exists:
899
                        if (isset($TCA[$subpartParams['_TABLE']])) {
900
                            $lookUpPid = isset($subpartParams['_PID']) ? intval($subpartParams['_PID']) : $pid;
901
                            $pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid';
902
                            $where = isset($subpartParams['_WHERE']) ? $subpartParams['_WHERE'] : '';
903
                            $addTable = isset($subpartParams['_ADDTABLE']) ? $subpartParams['_ADDTABLE'] : '';
904
905
                            $fieldName = $subpartParams['_FIELD'] ? $subpartParams['_FIELD'] : 'uid';
906
                            if ($fieldName === 'uid' || $TCA[$subpartParams['_TABLE']]['columns'][$fieldName]) {
907
                                $andWhereLanguage = '';
908
                                $transOrigPointerField = $TCA[$subpartParams['_TABLE']]['ctrl']['transOrigPointerField'];
909
910
                                if ($subpartParams['_ENABLELANG'] && $transOrigPointerField) {
911
                                    $andWhereLanguage = ' AND ' . $this->db->quoteStr($transOrigPointerField, $subpartParams['_TABLE']) . ' <= 0 ';
912
                                }
913
914
                                $where = $this->db->quoteStr($pidField, $subpartParams['_TABLE']) . '=' . intval($lookUpPid) . ' ' .
915
                                    $andWhereLanguage . $where;
916
917
                                $rows = $this->db->exec_SELECTgetRows(
918
                                    $fieldName,
919
                                    $subpartParams['_TABLE'] . $addTable,
920
                                    $where . BackendUtility::deleteClause($subpartParams['_TABLE']),
921
                                    '',
922
                                    '',
923
                                    '',
924
                                    $fieldName
925
                                );
926
927
                                if (is_array($rows)) {
928
                                    $paramArray[$p] = array_merge($paramArray[$p], array_keys($rows));
929
                                }
930
                            }
931
                        }
932
                    } else { // Just add value:
933 4
                        $paramArray[$p][] = $pV;
934
                    }
935
                    // Hook for processing own expandParameters place holder
936 4
                    if (is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'])) {
937
                        $_params = [
938
                            'pObj' => &$this,
939
                            'paramArray' => &$paramArray,
940
                            'currentKey' => $p,
941
                            'currentValue' => $pV,
942
                            'pid' => $pid
943
                        ];
944
                        foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $key => $_funcRef) {
945 4
                            GeneralUtility::callUserFunction($_funcRef, $_params, $this);
946
                        }
947
                    }
948
                }
949
950
                // Make unique set of values and sort array by key:
951 4
                $paramArray[$p] = array_unique($paramArray[$p]);
952 4
                ksort($paramArray);
953
            } else {
954
                // Set the literal value as only value in array:
955 4
                $paramArray[$p] = [$v];
956
            }
957
        }
958
959 4
        return $paramArray;
960
    }
961
962
    /**
963
     * Compiling URLs from parameter array (output of expandParameters())
964
     * The number of URLs will be the multiplication of the number of parameter values for each key
965
     *
966
     * @param array $paramArray Output of expandParameters(): Array with keys (GET var names) and for each an array of values
967
     * @param array $urls URLs accumulated in this array (for recursion)
968
     * @return array
969
     */
970 7
    public function compileUrls($paramArray, $urls = [])
971
    {
972 7
        if (count($paramArray) && is_array($urls)) {
973
            // shift first off stack:
974 6
            reset($paramArray);
975 6
            $varName = key($paramArray);
976 6
            $valueSet = array_shift($paramArray);
977
978
            // Traverse value set:
979 6
            $newUrls = [];
980 6
            foreach ($urls as $url) {
981 5
                foreach ($valueSet as $val) {
982 5
                    $newUrls[] = $url . (strcmp($val, '') ? '&' . rawurlencode($varName) . '=' . rawurlencode($val) : '');
983
984 5
                    if (count($newUrls) > MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000)) {
985 5
                        break;
986
                    }
987
                }
988
            }
989 6
            $urls = $newUrls;
990 6
            $urls = $this->compileUrls($paramArray, $urls);
991
        }
992
993 7
        return $urls;
994
    }
995
996
    /************************************
997
     *
998
     * Crawler log
999
     *
1000
     ************************************/
1001
1002
    /**
1003
     * Return array of records from crawler queue for input page ID
1004
     *
1005
     * @param integer $id Page ID for which to look up log entries.
1006
     * @param string$filter Filter: "all" => all entries, "pending" => all that is not yet run, "finished" => all complete ones
1007
     * @param boolean $doFlush If TRUE, then entries selected at DELETED(!) instead of selected!
1008
     * @param boolean $doFullFlush
1009
     * @param integer $itemsPerPage Limit the amount of entries per page default is 10
1010
     * @return array
1011
     */
1012 4
    public function getLogEntriesForPageId($id, $filter = '', $doFlush = false, $doFullFlush = false, $itemsPerPage = 10)
1013
    {
1014
        switch ($filter) {
1015 4
            case 'pending':
1016
                $addWhere = ' AND exec_time=0';
1017
                break;
1018 4
            case 'finished':
1019
                $addWhere = ' AND exec_time>0';
1020
                break;
1021
            default:
1022 4
                $addWhere = '';
1023 4
                break;
1024
        }
1025
1026
        // FIXME: Write unit test that ensures that the right records are deleted.
1027 4
        if ($doFlush) {
1028 2
            $this->flushQueue(($doFullFlush ? '1=1' : ('page_id=' . intval($id))) . $addWhere);
1029 2
            return [];
1030
        } else {
1031 2
            return $this->db->exec_SELECTgetRows(
1032 2
                '*',
1033 2
                'tx_crawler_queue',
1034 2
                'page_id=' . intval($id) . $addWhere,
1035 2
                '',
1036 2
                'scheduled DESC',
1037 2
                (intval($itemsPerPage) > 0 ? intval($itemsPerPage) : '')
1038
            );
1039
        }
1040
    }
1041
1042
    /**
1043
     * Return array of records from crawler queue for input set ID
1044
     *
1045
     * @param integer $set_id Set ID for which to look up log entries.
1046
     * @param string $filter Filter: "all" => all entries, "pending" => all that is not yet run, "finished" => all complete ones
1047
     * @param boolean $doFlush If TRUE, then entries selected at DELETED(!) instead of selected!
1048
     * @param integer $itemsPerPage Limit the amount of entires per page default is 10
1049
     * @return array
1050
     */
1051 6
    public function getLogEntriesForSetId($set_id, $filter = '', $doFlush = false, $doFullFlush = false, $itemsPerPage = 10)
1052
    {
1053
        // FIXME: Write Unit tests for Filters
1054
        switch ($filter) {
1055 6
            case 'pending':
1056 1
                $addWhere = ' AND exec_time=0';
1057 1
                break;
1058 5
            case 'finished':
1059 1
                $addWhere = ' AND exec_time>0';
1060 1
                break;
1061
            default:
1062 4
                $addWhere = '';
1063 4
                break;
1064
        }
1065
        // FIXME: Write unit test that ensures that the right records are deleted.
1066 6
        if ($doFlush) {
1067 4
            $this->flushQueue($doFullFlush ? '' : ('set_id=' . intval($set_id) . $addWhere));
1068 4
            return [];
1069
        } else {
1070 2
            return $this->db->exec_SELECTgetRows(
1071 2
                '*',
1072 2
                'tx_crawler_queue',
1073 2
                'set_id=' . intval($set_id) . $addWhere,
1074 2
                '',
1075 2
                'scheduled DESC',
1076 2
                (intval($itemsPerPage) > 0 ? intval($itemsPerPage) : '')
1077
            );
1078
        }
1079
    }
1080
1081
    /**
1082
     * Removes queue entries
1083
     *
1084
     * @param string $where SQL related filter for the entries which should be removed
1085
     * @return void
1086
     */
1087 10
    protected function flushQueue($where = '')
1088
    {
1089 10
        $realWhere = strlen($where) > 0 ? $where : '1=1';
1090
1091 10
        if (EventDispatcher::getInstance()->hasObserver('queueEntryFlush') || SignalSlotUtility::hasSignal(__CLASS__, SignalSlotUtility::SIGNAL_QUEUE_ENTRY_FLUSH)) {
1092
            $groups = $this->db->exec_SELECTgetRows('DISTINCT set_id', 'tx_crawler_queue', $realWhere);
1093
            if (is_array($groups)) {
1094
                foreach ($groups as $group) {
1095
1096
                    // The event dispatcher is deprecated since crawler v6.4.0, will be removed in crawler v7.0.0.
1097
                    // Please use the Signal instead.
1098
                    if (EventDispatcher::getInstance()->hasObserver('queueEntryFlush')) {
1099
                        EventDispatcher::getInstance()->post(
1100
                            'queueEntryFlush',
1101
                            $group['set_id'],
1102
                            $this->db->exec_SELECTgetRows('uid, set_id', 'tx_crawler_queue', $realWhere . ' AND set_id="' . $group['set_id'] . '"')
1103
                        );
1104
                    }
1105
1106
                    if (SignalSlotUtility::hasSignal(__CLASS__, SignalSlotUtility::SIGNAL_QUEUE_ENTRY_FLUSH)) {
1107
                        $signalInputArray = $this->db->exec_SELECTgetRows('uid, set_id', 'tx_crawler_queue', $realWhere . ' AND set_id="' . $group['set_id'] . '"');
1108
                        SignalSlotUtility::emitSignal(
1109
                            __CLASS__,
1110
                            SignalSlotUtility::SIGNAL_QUEUE_ENTRY_FLUSH,
1111
                            $signalInputArray
0 ignored issues
show
Bug introduced by
It seems like $signalInputArray defined by $this->db->exec_SELECTge...$group['set_id'] . '"') on line 1107 can also be of type null; however, AOE\Crawler\Utility\Sign...otUtility::emitSignal() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1112
                        );
1113
                    }
1114
                }
1115
            }
1116
        }
1117
1118 10
        $GLOBALS['TYPO3_DB']->exec_DELETEquery('tx_crawler_queue', $realWhere);
1119 10
    }
1120
1121
    /**
1122
     * Adding call back entries to log (called from hooks typically, see indexed search class "class.crawler.php"
1123
     *
1124
     * @param integer $setId Set ID
1125
     * @param array $params Parameters to pass to call back function
1126
     * @param string $callBack Call back object reference, eg. 'EXT:indexed_search/class.crawler.php:&tx_indexedsearch_crawler'
1127
     * @param integer $page_id Page ID to attach it to
1128
     * @param integer $schedule Time at which to activate
1129
     * @return void
1130
     */
1131
    public function addQueueEntry_callBack($setId, $params, $callBack, $page_id = 0, $schedule = 0)
1132
    {
1133
        if (!is_array($params)) {
1134
            $params = [];
1135
        }
1136
        $params['_CALLBACKOBJ'] = $callBack;
1137
1138
        // Compile value array:
1139
        $fieldArray = [
1140
            'page_id' => intval($page_id),
1141
            'parameters' => serialize($params),
1142
            'scheduled' => intval($schedule) ? intval($schedule) : $this->getCurrentTime(),
1143
            'exec_time' => 0,
1144
            'set_id' => intval($setId),
1145
            'result_data' => '',
1146
        ];
1147
1148
        $this->db->exec_INSERTquery('tx_crawler_queue', $fieldArray);
1149
    }
1150
1151
    /************************************
1152
     *
1153
     * URL setting
1154
     *
1155
     ************************************/
1156
1157
    /**
1158
     * Setting a URL for crawling:
1159
     *
1160
     * @param integer $id Page ID
1161
     * @param string $url Complete URL
1162
     * @param array $subCfg Sub configuration array (from TS config)
1163
     * @param integer $tstamp Scheduled-time
1164
     * @param string $configurationHash (optional) configuration hash
1165
     * @param bool $skipInnerDuplicationCheck (optional) skip inner duplication check
1166
     * @return bool
1167
     */
1168 4
    public function addUrl(
1169
        $id,
1170
        $url,
1171
        array $subCfg,
1172
        $tstamp,
1173
        $configurationHash = '',
1174
        $skipInnerDuplicationCheck = false
1175
    ) {
1176 4
        $urlAdded = false;
1177 4
        $rows = [];
1178
1179
        // Creating parameters:
1180
        $parameters = [
1181 4
            'url' => $url
1182
        ];
1183
1184
        // fe user group simulation:
1185 4
        $uGs = implode(',', array_unique(GeneralUtility::intExplode(',', $subCfg['userGroups'], true)));
1186 4
        if ($uGs) {
1187
            $parameters['feUserGroupList'] = $uGs;
1188
        }
1189
1190
        // Setting processing instructions
1191 4
        $parameters['procInstructions'] = GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']);
1192 4
        if (is_array($subCfg['procInstrParams.'])) {
1193 4
            $parameters['procInstrParams'] = $subCfg['procInstrParams.'];
1194
        }
1195
1196
        // Possible TypoScript Template Parents
1197 4
        $parameters['rootTemplatePid'] = $subCfg['rootTemplatePid'];
1198
1199
        // Compile value array:
1200 4
        $parameters_serialized = serialize($parameters);
1201
        $fieldArray = [
1202 4
            'page_id' => intval($id),
1203 4
            'parameters' => $parameters_serialized,
1204 4
            'parameters_hash' => GeneralUtility::shortMD5($parameters_serialized),
1205 4
            'configuration_hash' => $configurationHash,
1206 4
            'scheduled' => $tstamp,
1207 4
            'exec_time' => 0,
1208 4
            'set_id' => intval($this->setID),
1209 4
            'result_data' => '',
1210 4
            'configuration' => $subCfg['key'],
1211
        ];
1212
1213 4
        if ($this->registerQueueEntriesInternallyOnly) {
1214
            //the entries will only be registered and not stored to the database
1215
            $this->queueEntries[] = $fieldArray;
1216
        } else {
1217 4
            if (!$skipInnerDuplicationCheck) {
1218
                // check if there is already an equal entry
1219 4
                $rows = $this->getDuplicateRowsIfExist($tstamp, $fieldArray);
1220
            }
1221
1222 4
            if (count($rows) == 0) {
1223 4
                $this->db->exec_INSERTquery('tx_crawler_queue', $fieldArray);
1224 4
                $uid = $this->db->sql_insert_id();
1225 4
                $rows[] = $uid;
1226 4
                $urlAdded = true;
1227
1228
                // The event dispatcher is deprecated since crawler v6.4.0, will be removed in crawler v7.0.0.
1229
                // Please use the Signal instead.
1230 4
                EventDispatcher::getInstance()->post('urlAddedToQueue', $this->setID, ['uid' => $uid, 'fieldArray' => $fieldArray]);
1231
1232 4
                $signalPayload = ['uid' => $uid, 'fieldArray' => $fieldArray];
1233 4
                SignalSlotUtility::emitSignal(
1234 4
                    __CLASS__,
1235 4
                    SignalSlotUtility::SIGNAL_URL_ADDED_TO_QUEUE,
1236 4
                    $signalPayload
1237
                );
1238
1239
            } else {
1240
                // The event dispatcher is deprecated since crawler v6.4.0, will be removed in crawler v7.0.0.
1241
                // Please use the Signal instead.
1242 2
                EventDispatcher::getInstance()->post('duplicateUrlInQueue', $this->setID, ['rows' => $rows, 'fieldArray' => $fieldArray]);
1243
1244 2
                $signalPayload = ['rows' => $rows, 'fieldArray' => $fieldArray];
1245 2
                SignalSlotUtility::emitSignal(
1246 2
                    __CLASS__,
1247 2
                    SignalSlotUtility::SIGNAL_DUPLICATE_URL_IN_QUEUE,
1248 2
                    $signalPayload
1249
                );
1250
            }
1251
        }
1252
1253 4
        return $urlAdded;
1254
    }
1255
1256
    /**
1257
     * This method determines duplicates for a queue entry with the same parameters and this timestamp.
1258
     * If the timestamp is in the past, it will check if there is any unprocessed queue entry in the past.
1259
     * If the timestamp is in the future it will check, if the queued entry has exactly the same timestamp
1260
     *
1261
     * @param int $tstamp
1262
     * @param array $fieldArray
1263
     *
1264
     * @return array
1265
     */
1266 4
    protected function getDuplicateRowsIfExist($tstamp, $fieldArray)
1267
    {
1268 4
        $rows = [];
1269
1270 4
        $currentTime = $this->getCurrentTime();
1271
1272
        //if this entry is scheduled with "now"
1273 4
        if ($tstamp <= $currentTime) {
1274 1
            if ($this->extensionSettings['enableTimeslot']) {
1275 1
                $timeBegin = $currentTime - 100;
1276 1
                $timeEnd = $currentTime + 100;
1277 1
                $where = ' ((scheduled BETWEEN ' . $timeBegin . ' AND ' . $timeEnd . ' ) OR scheduled <= ' . $currentTime . ') ';
1278
            } else {
1279 1
                $where = 'scheduled <= ' . $currentTime;
1280
            }
1281 3
        } elseif ($tstamp > $currentTime) {
1282
            //entry with a timestamp in the future need to have the same schedule time
1283 3
            $where = 'scheduled = ' . $tstamp ;
1284
        }
1285
1286 4
        if (!empty($where)) {
1287 4
            $result = $this->db->exec_SELECTgetRows(
1288 4
                'qid',
1289 4
                'tx_crawler_queue',
1290
                $where .
1291 4
                ' AND NOT exec_time' .
1292 4
                ' AND NOT process_id ' .
1293 4
                ' AND page_id=' . intval($fieldArray['page_id']) .
1294 4
                ' AND parameters_hash = ' . $this->db->fullQuoteStr($fieldArray['parameters_hash'], 'tx_crawler_queue')
1295
            );
1296
1297 4
            if (is_array($result)) {
1298 4
                foreach ($result as $value) {
1299 2
                    $rows[] = $value['qid'];
1300
                }
1301
            }
1302
        }
1303
1304 4
        return $rows;
1305
    }
1306
1307
    /**
1308
     * Returns the current system time
1309
     *
1310
     * @return int
1311
     */
1312
    public function getCurrentTime()
1313
    {
1314
        return time();
1315
    }
1316
1317
    /************************************
1318
     *
1319
     * URL reading
1320
     *
1321
     ************************************/
1322
1323
    /**
1324
     * Read URL for single queue entry
1325
     *
1326
     * @param integer $queueId
1327
     * @param boolean $force If set, will process even if exec_time has been set!
1328
     * @return integer
1329
     */
1330
    public function readUrl($queueId, $force = false)
1331
    {
1332
        $ret = 0;
1333
        if ($this->debugMode) {
1334
            GeneralUtility::devlog('crawler-readurl start ' . microtime(true), __FUNCTION__);
1335
        }
1336
        // Get entry:
1337
        list($queueRec) = $this->db->exec_SELECTgetRows(
1338
            '*',
1339
            'tx_crawler_queue',
1340
            'qid=' . intval($queueId) . ($force ? '' : ' AND exec_time=0 AND process_scheduled > 0')
1341
        );
1342
1343
        if (!is_array($queueRec)) {
1344
            return;
1345
        }
1346
1347
        $parameters = unserialize($queueRec['parameters']);
1348
        if ($parameters['rootTemplatePid']) {
1349
            $this->initTSFE((int)$parameters['rootTemplatePid']);
1350
        } else {
1351
            GeneralUtility::sysLog(
1352
                'Page with (' . $queueRec['page_id'] . ') could not be crawled, please check your crawler configuration. Perhaps no Root Template Pid is set',
1353
                'crawler',
1354
                GeneralUtility::SYSLOG_SEVERITY_WARNING
1355
            );
1356
        }
1357
1358
        $signalPayload = [$queueId, $queueRec];
1359
        SignalSlotUtility::emitSignal(
1360
            __CLASS__,
1361
            SignalSlotUtility::SIGNAL_QUEUEITEM_PREPROCESS,
1362
            $signalPayload
1363
        );
1364
1365
        // Set exec_time to lock record:
1366
        $field_array = ['exec_time' => $this->getCurrentTime()];
1367
1368
        if (isset($this->processID)) {
1369
            //if mulitprocessing is used we need to store the id of the process which has handled this entry
1370
            $field_array['process_id_completed'] = $this->processID;
1371
        }
1372
        $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid=' . intval($queueId), $field_array);
1373
1374
        $result = $this->readUrl_exec($queueRec);
1375
        $resultData = unserialize($result['content']);
1376
1377
        //atm there's no need to point to specific pollable extensions
1378
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pollSuccess'])) {
1379
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pollSuccess'] as $pollable) {
1380
                // only check the success value if the instruction is runnig
1381
                // it is important to name the pollSuccess key same as the procInstructions key
1382
                if (is_array($resultData['parameters']['procInstructions']) && in_array(
1383
                    $pollable,
1384
                        $resultData['parameters']['procInstructions']
1385
                )
1386
                ) {
1387
                    if (!empty($resultData['success'][$pollable]) && $resultData['success'][$pollable]) {
1388
                        $ret |= self::CLI_STATUS_POLLABLE_PROCESSED;
1389
                    }
1390
                }
1391
            }
1392
        }
1393
1394
        // Set result in log which also denotes the end of the processing of this entry.
1395
        $field_array = ['result_data' => serialize($result)];
1396
1397
        $signalPayload = [$queueId, $field_array];
1398
        SignalSlotUtility::emitSignal(
1399
            __CLASS__,
1400
            SignalSlotUtility::SIGNAL_QUEUEITEM_POSTPROCESS,
1401
            $signalPayload
1402
        );
1403
1404
        $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid=' . intval($queueId), $field_array);
1405
1406
        if ($this->debugMode) {
1407
            GeneralUtility::devlog('crawler-readurl stop ' . microtime(true), __FUNCTION__);
1408
        }
1409
1410
        return $ret;
1411
    }
1412
1413
    /**
1414
     * Read URL for not-yet-inserted log-entry
1415
     *
1416
     * @param array $field_array Queue field array,
1417
     *
1418
     * @return string
1419
     */
1420
    public function readUrlFromArray($field_array)
1421
    {
1422
1423
            // Set exec_time to lock record:
1424
        $field_array['exec_time'] = $this->getCurrentTime();
1425
        $this->db->exec_INSERTquery('tx_crawler_queue', $field_array);
1426
        $queueId = $field_array['qid'] = $this->db->sql_insert_id();
1427
1428
        $result = $this->readUrl_exec($field_array);
1429
1430
        // Set result in log which also denotes the end of the processing of this entry.
1431
        $field_array = ['result_data' => serialize($result)];
1432
1433
        $signalPayload = [$queueId, $field_array];
1434
        SignalSlotUtility::emitSignal(
1435
            __CLASS__,
1436
            SignalSlotUtility::SIGNAL_QUEUEITEM_POSTPROCESS,
1437
            $signalPayload
1438
        );
1439
1440
        $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid=' . intval($queueId), $field_array);
1441
1442
        return $result;
1443
    }
1444
1445
    /**
1446
     * Read URL for a queue record
1447
     *
1448
     * @param array $queueRec Queue record
1449
     * @return string
1450
     */
1451
    public function readUrl_exec($queueRec)
1452
    {
1453
        // Decode parameters:
1454
        $parameters = unserialize($queueRec['parameters']);
1455
        $result = 'ERROR';
1456
        if (is_array($parameters)) {
1457
            if ($parameters['_CALLBACKOBJ']) { // Calling object:
1458
                $objRef = $parameters['_CALLBACKOBJ'];
1459
                $callBackObj = &GeneralUtility::getUserObj($objRef);
1460
                if (is_object($callBackObj)) {
1461
                    unset($parameters['_CALLBACKOBJ']);
1462
                    $result = ['content' => serialize($callBackObj->crawler_execute($parameters, $this))];
1463
                } else {
1464
                    $result = ['content' => 'No object: ' . $objRef];
1465
                }
1466
            } else { // Regular FE request:
1467
1468
                // Prepare:
1469
                $crawlerId = $queueRec['qid'] . ':' . md5($queueRec['qid'] . '|' . $queueRec['set_id'] . '|' . $GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey']);
1470
1471
                // Get result:
1472
                $result = $this->requestUrl($parameters['url'], $crawlerId);
1473
1474
                // The event dispatcher is deprecated since crawler v6.4.0, will be removed in crawler v7.0.0.
1475
                // Please use the Signal instead.
1476
                EventDispatcher::getInstance()->post('urlCrawled', $queueRec['set_id'], ['url' => $parameters['url'], 'result' => $result]);
1477
1478
                $signalPayload = ['url' => $parameters['url'], 'result' => $result];
1479
                SignalSlotUtility::emitSignal(
1480
                    __CLASS__,
1481
                    SignalSlotUtility::SIGNAL_URL_CRAWLED,
1482
                    $signalPayload
1483
                );
1484
            }
1485
        }
1486
1487
        return $result;
1488
    }
1489
1490
    /**
1491
     * Gets the content of a URL.
1492
     *
1493
     * @param string $originalUrl URL to read
1494
     * @param string $crawlerId Crawler ID string (qid + hash to verify)
1495
     * @param integer $timeout Timeout time
1496
     * @param integer $recursion Recursion limiter for 302 redirects
1497
     * @return array
1498
     */
1499 2
    public function requestUrl($originalUrl, $crawlerId, $timeout = 2, $recursion = 10)
1500
    {
1501 2
        if (!$recursion) {
1502
            return false;
1503
        }
1504
1505
        // Parse URL, checking for scheme:
1506 2
        $url = parse_url($originalUrl);
1507
1508 2
        if ($url === false) {
1509
            if (TYPO3_DLOG) {
1510
                GeneralUtility::devLog(sprintf('Could not parse_url() for string "%s"', $url), 'crawler', 4, ['crawlerId' => $crawlerId]);
1511
            }
1512
            return false;
1513
        }
1514
1515 2
        if (!in_array($url['scheme'], ['','http','https'])) {
1516
            if (TYPO3_DLOG) {
1517
                GeneralUtility::devLog(sprintf('Scheme does not match for url "%s"', $url), 'crawler', 4, ['crawlerId' => $crawlerId]);
1518
            }
1519
            return false;
1520
        }
1521
1522
        // direct request
1523 2
        if ($this->extensionSettings['makeDirectRequests']) {
1524 2
            $result = $this->sendDirectRequest($originalUrl, $crawlerId);
1525 2
            return $result;
1526
        }
1527
1528
        $reqHeaders = $this->buildRequestHeaderArray($url, $crawlerId);
1529
1530
        // thanks to Pierrick Caillon for adding proxy support
1531
        $rurl = $url;
1532
1533
        if ($this->extensionSettings['curlUse'] && $this->extensionSettings['curlProxyServer']) {
1534
            $rurl = parse_url($this->extensionSettings['curlProxyServer']);
1535
            $url['path'] = $url['scheme'] . '://' . $url['host'] . ($url['port'] > 0 ? ':' . $url['port'] : '') . $url['path'];
1536
            $reqHeaders = $this->buildRequestHeaderArray($url, $crawlerId);
1537
        }
1538
1539
        $host = $rurl['host'];
1540
1541
        if ($url['scheme'] == 'https') {
1542
            $host = 'ssl://' . $host;
1543
            $port = ($rurl['port'] > 0) ? $rurl['port'] : 443;
1544
        } else {
1545
            $port = ($rurl['port'] > 0) ? $rurl['port'] : 80;
1546
        }
1547
1548
        $startTime = microtime(true);
1549
        $fp = fsockopen($host, $port, $errno, $errstr, $timeout);
1550
1551
        if (!$fp) {
1552
            if (TYPO3_DLOG) {
1553
                GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, ['crawlerId' => $crawlerId]);
1554
            }
1555
            return false;
1556
        } else {
1557
            // Request message:
1558
            $msg = implode("\r\n", $reqHeaders) . "\r\n\r\n";
1559
            fputs($fp, $msg);
1560
1561
            // Read response:
1562
            $d = $this->getHttpResponseFromStream($fp);
1563
            fclose($fp);
1564
1565
            $time = microtime(true) - $startTime;
1566
            $this->log($originalUrl . ' ' . $time);
1567
1568
            // Implode content and headers:
1569
            $result = [
1570
                'request' => $msg,
1571
                'headers' => implode('', $d['headers']),
1572
                'content' => implode('', (array)$d['content'])
1573
            ];
1574
1575
            if (($this->extensionSettings['follow30x']) && ($newUrl = $this->getRequestUrlFrom302Header($d['headers'], $url['user'], $url['pass']))) {
1576
                $result = array_merge(['parentRequest' => $result], $this->requestUrl($newUrl, $crawlerId, $recursion--));
0 ignored issues
show
Bug introduced by
It seems like $newUrl defined by $this->getRequestUrlFrom...['user'], $url['pass']) on line 1575 can also be of type boolean; however, AOE\Crawler\Controller\C...ontroller::requestUrl() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1577
                $newRequestUrl = $this->requestUrl($newUrl, $crawlerId, $timeout, --$recursion);
0 ignored issues
show
Bug introduced by
It seems like $newUrl defined by $this->getRequestUrlFrom...['user'], $url['pass']) on line 1575 can also be of type boolean; however, AOE\Crawler\Controller\C...ontroller::requestUrl() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1578
1579
                if (is_array($newRequestUrl)) {
1580
                    $result = array_merge(['parentRequest' => $result], $newRequestUrl);
1581
                } else {
1582
                    if (TYPO3_DLOG) {
1583
                        GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, ['crawlerId' => $crawlerId]);
1584
                    }
1585
                    return false;
1586
                }
1587
            }
1588
1589
            return $result;
1590
        }
1591
    }
1592
1593
    /**
1594
     * Gets the base path of the website frontend.
1595
     * (e.g. if you call http://mydomain.com/cms/index.php in
1596
     * the browser the base path is "/cms/")
1597
     *
1598
     * @return string Base path of the website frontend
1599
     */
1600
    protected function getFrontendBasePath()
1601
    {
1602
        $frontendBasePath = '/';
1603
1604
        // Get the path from the extension settings:
1605
        if (isset($this->extensionSettings['frontendBasePath']) && $this->extensionSettings['frontendBasePath']) {
1606
            $frontendBasePath = $this->extensionSettings['frontendBasePath'];
1607
            // If empty, try to use config.absRefPrefix:
1608
        } elseif (isset($GLOBALS['TSFE']->absRefPrefix) && !empty($GLOBALS['TSFE']->absRefPrefix)) {
1609
            $frontendBasePath = $GLOBALS['TSFE']->absRefPrefix;
1610
            // If not in CLI mode the base path can be determined from $_SERVER environment:
1611
        } elseif (!defined('TYPO3_REQUESTTYPE_CLI') || !TYPO3_REQUESTTYPE_CLI) {
1612
            $frontendBasePath = GeneralUtility::getIndpEnv('TYPO3_SITE_PATH');
1613
        }
1614
1615
        // Base path must be '/<pathSegements>/':
1616
        if ($frontendBasePath != '/') {
1617
            $frontendBasePath = '/' . ltrim($frontendBasePath, '/');
1618
            $frontendBasePath = rtrim($frontendBasePath, '/') . '/';
1619
        }
1620
1621
        return $frontendBasePath;
1622
    }
1623
1624
    /**
1625
     * Executes a shell command and returns the outputted result.
1626
     *
1627
     * @param string $command Shell command to be executed
1628
     * @return string Outputted result of the command execution
1629
     */
1630
    protected function executeShellCommand($command)
1631
    {
1632
        $result = shell_exec($command);
1633
        return $result;
1634
    }
1635
1636
    /**
1637
     * Reads HTTP response from the given stream.
1638
     *
1639
     * @param  resource $streamPointer  Pointer to connection stream.
1640
     * @return array                    Associative array with the following items:
1641
     *                                  headers <array> Response headers sent by server.
1642
     *                                  content <array> Content, with each line as an array item.
1643
     */
1644 1
    protected function getHttpResponseFromStream($streamPointer)
1645
    {
1646 1
        $response = ['headers' => [], 'content' => []];
1647
1648 1
        if (is_resource($streamPointer)) {
1649
            // read headers
1650 1
            while ($line = fgets($streamPointer, '2048')) {
1651 1
                $line = trim($line);
1652 1
                if ($line !== '') {
1653 1
                    $response['headers'][] = $line;
1654
                } else {
1655 1
                    break;
1656
                }
1657
            }
1658
1659
            // read content
1660 1
            while ($line = fgets($streamPointer, '2048')) {
1661 1
                $response['content'][] = $line;
1662
            }
1663
        }
1664
1665 1
        return $response;
1666
    }
1667
1668
    /**
1669
     * @param message
1670
     */
1671 2
    protected function log($message)
1672
    {
1673 2
        if (!empty($this->extensionSettings['logFileName'])) {
1674
            $fileResult = @file_put_contents($this->extensionSettings['logFileName'], date('Ymd His') . ' ' . $message . PHP_EOL, FILE_APPEND);
1675
            if (!$fileResult) {
1676
                GeneralUtility::devLog('File "' . $this->extensionSettings['logFileName'] . '" could not be written, please check file permissions.', 'crawler', LogLevel::INFO);
1677
            }
1678
        }
1679 2
    }
1680
1681
    /**
1682
     * Builds HTTP request headers.
1683
     *
1684
     * @param array $url
1685
     * @param string $crawlerId
1686
     *
1687
     * @return array
1688
     */
1689 6
    protected function buildRequestHeaderArray(array $url, $crawlerId)
1690
    {
1691 6
        $reqHeaders = [];
1692 6
        $reqHeaders[] = 'GET ' . $url['path'] . ($url['query'] ? '?' . $url['query'] : '') . ' HTTP/1.0';
1693 6
        $reqHeaders[] = 'Host: ' . $url['host'];
1694 6
        if (stristr($url['query'], 'ADMCMD_previewWS')) {
1695 2
            $reqHeaders[] = 'Cookie: $Version="1"; be_typo_user="1"; $Path=/';
1696
        }
1697 6
        $reqHeaders[] = 'Connection: close';
1698 6
        if ($url['user'] != '') {
1699 2
            $reqHeaders[] = 'Authorization: Basic ' . base64_encode($url['user'] . ':' . $url['pass']);
1700
        }
1701 6
        $reqHeaders[] = 'X-T3crawler: ' . $crawlerId;
1702 6
        $reqHeaders[] = 'User-Agent: TYPO3 crawler';
1703 6
        return $reqHeaders;
1704
    }
1705
1706
    /**
1707
     * Check if the submitted HTTP-Header contains a redirect location and built new crawler-url
1708
     *
1709
     * @param array $headers HTTP Header
1710
     * @param string $user HTTP Auth. User
1711
     * @param string $pass HTTP Auth. Password
1712
     * @return bool|string
1713
     */
1714 12
    protected function getRequestUrlFrom302Header($headers, $user = '', $pass = '')
1715
    {
1716 12
        $header = [];
1717 12
        if (!is_array($headers)) {
1718 1
            return false;
1719
        }
1720 11
        if (!(stristr($headers[0], '301 Moved') || stristr($headers[0], '302 Found') || stristr($headers[0], '302 Moved'))) {
1721 2
            return false;
1722
        }
1723
1724 9
        foreach ($headers as $hl) {
1725 9
            $tmp = explode(": ", $hl);
1726 9
            $header[trim($tmp[0])] = trim($tmp[1]);
1727 9
            if (trim($tmp[0]) == 'Location') {
1728 9
                break;
1729
            }
1730
        }
1731 9
        if (!array_key_exists('Location', $header)) {
1732 3
            return false;
1733
        }
1734
1735 6
        if ($user != '') {
1736 3
            if (!($tmp = parse_url($header['Location']))) {
1737 1
                return false;
1738
            }
1739 2
            $newUrl = $tmp['scheme'] . '://' . $user . ':' . $pass . '@' . $tmp['host'] . $tmp['path'];
1740 2
            if ($tmp['query'] != '') {
1741 2
                $newUrl .= '?' . $tmp['query'];
1742
            }
1743
        } else {
1744 3
            $newUrl = $header['Location'];
1745
        }
1746 5
        return $newUrl;
1747
    }
1748
1749
    /**************************
1750
     *
1751
     * tslib_fe hooks:
1752
     *
1753
     **************************/
1754
1755
    /**
1756
     * Initialization hook (called after database connection)
1757
     * Takes the "HTTP_X_T3CRAWLER" header and looks up queue record and verifies if the session comes from the system (by comparing hashes)
1758
     *
1759
     * @param array $params Parameters from frontend
1760
     * @param object $ref TSFE object (reference under PHP5)
1761
     * @return void
1762
     *
1763
     * FIXME: Look like this is not used, in commit 9910d3f40cce15f4e9b7bcd0488bf21f31d53ebc it's added as public,
1764
     * FIXME: I think this can be removed. (TNM)
1765
     */
1766
    public function fe_init(&$params, $ref)
0 ignored issues
show
Unused Code introduced by
The parameter $ref is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
1767
    {
1768
        // Authenticate crawler request:
1769
        if (isset($_SERVER['HTTP_X_T3CRAWLER'])) {
1770
            list($queueId, $hash) = explode(':', $_SERVER['HTTP_X_T3CRAWLER']);
1771
            list($queueRec) = $this->db->exec_SELECTgetSingleRow('*', 'tx_crawler_queue', 'qid=' . intval($queueId));
1772
1773
            // If a crawler record was found and hash was matching, set it up:
1774
            if (is_array($queueRec) && $hash === md5($queueRec['qid'] . '|' . $queueRec['set_id'] . '|' . $GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey'])) {
1775
                $params['pObj']->applicationData['tx_crawler']['running'] = true;
1776
                $params['pObj']->applicationData['tx_crawler']['parameters'] = unserialize($queueRec['parameters']);
1777
                $params['pObj']->applicationData['tx_crawler']['log'] = [];
1778
            } else {
1779
                die('No crawler entry found!');
0 ignored issues
show
Coding Style Compatibility introduced by
The method fe_init() contains an exit expression.

An exit expression should only be used in rare cases. For example, if you write a short command line script.

In most cases however, using an exit expression makes the code untestable and often causes incompatibilities with other libraries. Thus, unless you are absolutely sure it is required here, we recommend to refactor your code to avoid its usage.

Loading history...
1780
            }
1781
        }
1782
    }
1783
1784
    /*****************************
1785
     *
1786
     * Compiling URLs to crawl - tools
1787
     *
1788
     *****************************/
1789
1790
    /**
1791
     * @param integer $id Root page id to start from.
1792
     * @param integer $depth Depth of tree, 0=only id-page, 1= on sublevel, 99 = infinite
1793
     * @param integer $scheduledTime Unix Time when the URL is timed to be visited when put in queue
1794
     * @param integer $reqMinute Number of requests per minute (creates the interleave between requests)
1795
     * @param boolean $submitCrawlUrls If set, submits the URLs to queue in database (real crawling)
1796
     * @param boolean $downloadCrawlUrls If set (and submitcrawlUrls is false) will fill $downloadUrls with entries)
1797
     * @param array $incomingProcInstructions Array of processing instructions
1798
     * @param array $configurationSelection Array of configuration keys
1799
     * @return string
1800
     */
1801
    public function getPageTreeAndUrls(
1802
        $id,
1803
        $depth,
1804
        $scheduledTime,
1805
        $reqMinute,
1806
        $submitCrawlUrls,
1807
        $downloadCrawlUrls,
1808
        array $incomingProcInstructions,
1809
        array $configurationSelection
1810
    ) {
1811
        global $BACK_PATH;
1812
        global $LANG;
1813
        if (!is_object($LANG)) {
1814
            $LANG = GeneralUtility::makeInstance(LanguageService::class);
1815
            $LANG->init(0);
1816
        }
1817
        $this->scheduledTime = $scheduledTime;
1818
        $this->reqMinute = $reqMinute;
1819
        $this->submitCrawlUrls = $submitCrawlUrls;
1820
        $this->downloadCrawlUrls = $downloadCrawlUrls;
1821
        $this->incomingProcInstructions = $incomingProcInstructions;
1822
        $this->incomingConfigurationSelection = $configurationSelection;
1823
1824
        $this->duplicateTrack = [];
1825
        $this->downloadUrls = [];
1826
1827
        // Drawing tree:
1828
        /* @var PageTreeView $tree */
1829
        $tree = GeneralUtility::makeInstance(PageTreeView::class);
1830
        $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1);
1831
        $tree->init('AND ' . $perms_clause);
1832
1833
        $pageInfo = BackendUtility::readPageAccess($id, $perms_clause);
1834
        if (is_array($pageInfo)) {
1835
            // Set root row:
1836
            $tree->tree[] = [
1837
                'row' => $pageInfo,
1838
                'HTML' => IconUtility::getIconForRecord('pages', $pageInfo)
1839
            ];
1840
        }
1841
1842
        // Get branch beneath:
1843
        if ($depth) {
1844
            $tree->getTree($id, $depth, '');
1845
        }
1846
1847
        // Traverse page tree:
1848
        $code = '';
1849
1850
        foreach ($tree->tree as $data) {
1851
            $this->MP = false;
1852
1853
            // recognize mount points
1854
            if ($data['row']['doktype'] == 7) {
1855
                $mountpage = $this->db->exec_SELECTgetRows('*', 'pages', 'uid = ' . $data['row']['uid']);
1856
1857
                // fetch mounted pages
1858
                $this->MP = $mountpage[0]['mount_pid'] . '-' . $data['row']['uid'];
0 ignored issues
show
Documentation Bug introduced by
The property $MP was declared of type boolean, but $mountpage[0]['mount_pid...' . $data['row']['uid'] is of type string. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
1859
1860
                $mountTree = GeneralUtility::makeInstance(PageTreeView::class);
1861
                $mountTree->init('AND ' . $perms_clause);
1862
                $mountTree->getTree($mountpage[0]['mount_pid'], $depth, '');
1863
1864
                foreach ($mountTree->tree as $mountData) {
1865
                    $code .= $this->drawURLs_addRowsForPage(
1866
                        $mountData['row'],
1867
                        $mountData['HTML'] . BackendUtility::getRecordTitle('pages', $mountData['row'], true)
1868
                    );
1869
                }
1870
1871
                // replace page when mount_pid_ol is enabled
1872
                if ($mountpage[0]['mount_pid_ol']) {
1873
                    $data['row']['uid'] = $mountpage[0]['mount_pid'];
1874
                } else {
1875
                    // if the mount_pid_ol is not set the MP must not be used for the mountpoint page
1876
                    $this->MP = false;
1877
                }
1878
            }
1879
1880
            $code .= $this->drawURLs_addRowsForPage(
1881
                $data['row'],
1882
                $data['HTML'] . BackendUtility::getRecordTitle('pages', $data['row'], true)
1883
            );
1884
        }
1885
1886
        return $code;
1887
    }
1888
1889
    /**
1890
     * Expands exclude string
1891
     *
1892
     * @param string $excludeString Exclude string
1893
     * @return array
1894
     */
1895 1
    public function expandExcludeString($excludeString)
1896
    {
1897
        // internal static caches;
1898 1
        static $expandedExcludeStringCache;
1899 1
        static $treeCache;
1900
1901 1
        if (empty($expandedExcludeStringCache[$excludeString])) {
1902 1
            $pidList = [];
1903
1904 1
            if (!empty($excludeString)) {
1905
                /** @var PageTreeView $tree */
1906
                $tree = GeneralUtility::makeInstance(PageTreeView::class);
1907
                $tree->init('AND ' . $this->backendUser->getPagePermsClause(1));
1908
1909
                $excludeParts = GeneralUtility::trimExplode(',', $excludeString);
1910
1911
                foreach ($excludeParts as $excludePart) {
1912
                    list($pid, $depth) = GeneralUtility::trimExplode('+', $excludePart);
1913
1914
                    // default is "page only" = "depth=0"
1915
                    if (empty($depth)) {
1916
                        $depth = (stristr($excludePart, '+')) ? 99 : 0;
1917
                    }
1918
1919
                    $pidList[] = $pid;
1920
1921
                    if ($depth > 0) {
1922
                        if (empty($treeCache[$pid][$depth])) {
1923
                            $tree->reset();
1924
                            $tree->getTree($pid, $depth);
1925
                            $treeCache[$pid][$depth] = $tree->tree;
1926
                        }
1927
1928
                        foreach ($treeCache[$pid][$depth] as $data) {
1929
                            $pidList[] = $data['row']['uid'];
1930
                        }
1931
                    }
1932
                }
1933
            }
1934
1935 1
            $expandedExcludeStringCache[$excludeString] = array_unique($pidList);
1936
        }
1937
1938 1
        return $expandedExcludeStringCache[$excludeString];
1939
    }
1940
1941
    /**
1942
     * Create the rows for display of the page tree
1943
     * For each page a number of rows are shown displaying GET variable configuration
1944
     *
1945
     * @param    array        Page row
1946
     * @param    string        Page icon and title for row
1947
     * @return    string        HTML <tr> content (one or more)
1948
     */
1949
    public function drawURLs_addRowsForPage(array $pageRow, $pageTitleAndIcon)
1950
    {
1951
        $skipMessage = '';
1952
1953
        // Get list of configurations
1954
        $configurations = $this->getUrlsForPageRow($pageRow, $skipMessage);
1955
1956
        if (count($this->incomingConfigurationSelection) > 0) {
1957
            // remove configuration that does not match the current selection
1958
            foreach ($configurations as $confKey => $confArray) {
1959
                if (!in_array($confKey, $this->incomingConfigurationSelection)) {
1960
                    unset($configurations[$confKey]);
1961
                }
1962
            }
1963
        }
1964
1965
        // Traverse parameter combinations:
1966
        $c = 0;
1967
        $content = '';
1968
        if (count($configurations)) {
1969
            foreach ($configurations as $confKey => $confArray) {
1970
1971
                    // Title column:
1972
                if (!$c) {
1973
                    $titleClm = '<td rowspan="' . count($configurations) . '">' . $pageTitleAndIcon . '</td>';
1974
                } else {
1975
                    $titleClm = '';
1976
                }
1977
1978
                if (!in_array($pageRow['uid'], $this->expandExcludeString($confArray['subCfg']['exclude']))) {
1979
1980
                        // URL list:
1981
                    $urlList = $this->urlListFromUrlArray(
1982
                        $confArray,
1983
                        $pageRow,
1984
                        $this->scheduledTime,
1985
                        $this->reqMinute,
1986
                        $this->submitCrawlUrls,
1987
                        $this->downloadCrawlUrls,
1988
                        $this->duplicateTrack,
1989
                        $this->downloadUrls,
1990
                        $this->incomingProcInstructions // if empty the urls won't be filtered by processing instructions
1991
                    );
1992
1993
                    // Expanded parameters:
1994
                    $paramExpanded = '';
1995
                    $calcAccu = [];
1996
                    $calcRes = 1;
1997
                    foreach ($confArray['paramExpanded'] as $gVar => $gVal) {
1998
                        $paramExpanded .= '
1999
                            <tr>
2000
                                <td class="bgColor4-20">' . htmlspecialchars('&' . $gVar . '=') . '<br/>' .
2001
                                                '(' . count($gVal) . ')' .
2002
                                                '</td>
2003
                                <td class="bgColor4" nowrap="nowrap">' . nl2br(htmlspecialchars(implode(chr(10), $gVal))) . '</td>
2004
                            </tr>
2005
                        ';
2006
                        $calcRes *= count($gVal);
2007
                        $calcAccu[] = count($gVal);
2008
                    }
2009
                    $paramExpanded = '<table class="lrPadding c-list param-expanded">' . $paramExpanded . '</table>';
2010
                    $paramExpanded .= 'Comb: ' . implode('*', $calcAccu) . '=' . $calcRes;
2011
2012
                    // Options
2013
                    $optionValues = '';
2014
                    if ($confArray['subCfg']['userGroups']) {
2015
                        $optionValues .= 'User Groups: ' . $confArray['subCfg']['userGroups'] . '<br/>';
2016
                    }
2017
                    if ($confArray['subCfg']['baseUrl']) {
2018
                        $optionValues .= 'Base Url: ' . $confArray['subCfg']['baseUrl'] . '<br/>';
2019
                    }
2020
                    if ($confArray['subCfg']['procInstrFilter']) {
2021
                        $optionValues .= 'ProcInstr: ' . $confArray['subCfg']['procInstrFilter'] . '<br/>';
2022
                    }
2023
2024
                    // Compile row:
2025
                    $content .= '
2026
                        <tr class="bgColor' . ($c % 2 ? '-20' : '-10') . '">
2027
                            ' . $titleClm . '
2028
                            <td>' . htmlspecialchars($confKey) . '</td>
2029
                            <td>' . nl2br(htmlspecialchars(rawurldecode(trim(str_replace('&', chr(10) . '&', GeneralUtility::implodeArrayForUrl('', $confArray['paramParsed'])))))) . '</td>
2030
                            <td>' . $paramExpanded . '</td>
2031
                            <td nowrap="nowrap">' . $urlList . '</td>
2032
                            <td nowrap="nowrap">' . $optionValues . '</td>
2033
                            <td nowrap="nowrap">' . DebugUtility::viewArray($confArray['subCfg']['procInstrParams.']) . '</td>
2034
                        </tr>';
2035
                } else {
2036
                    $content .= '<tr class="bgColor' . ($c % 2 ? '-20' : '-10') . '">
2037
                            ' . $titleClm . '
2038
                            <td>' . htmlspecialchars($confKey) . '</td>
2039
                            <td colspan="5"><em>No entries</em> (Page is excluded in this configuration)</td>
2040
                        </tr>';
2041
                }
2042
2043
                $c++;
2044
            }
2045
        } else {
2046
            $message = !empty($skipMessage) ? ' (' . $skipMessage . ')' : '';
2047
2048
            // Compile row:
2049
            $content .= '
2050
                <tr class="bgColor-20" style="border-bottom: 1px solid black;">
2051
                    <td>' . $pageTitleAndIcon . '</td>
2052
                    <td colspan="6"><em>No entries</em>' . $message . '</td>
2053
                </tr>';
2054
        }
2055
2056
        return $content;
2057
    }
2058
2059
    /*****************************
2060
     *
2061
     * CLI functions
2062
     *
2063
     *****************************/
2064
2065
    /**
2066
     * Main function for running from Command Line PHP script (cron job)
2067
     * See ext/crawler/cli/crawler_cli.phpsh for details
2068
     *
2069
     * @return int number of remaining items or false if error
2070
     */
2071
    public function CLI_main()
2072
    {
2073
        $this->setAccessMode('cli');
2074
        $result = self::CLI_STATUS_NOTHING_PROCCESSED;
2075
        $cliObj = GeneralUtility::makeInstance(CrawlerCommandLineController::class);
2076
2077
        if (isset($cliObj->cli_args['-h']) || isset($cliObj->cli_args['--help'])) {
2078
            $cliObj->cli_validateArgs();
2079
            $cliObj->cli_help();
2080
            exit;
0 ignored issues
show
Coding Style Compatibility introduced by
The method CLI_main() contains an exit expression.

An exit expression should only be used in rare cases. For example, if you write a short command line script.

In most cases however, using an exit expression makes the code untestable and often causes incompatibilities with other libraries. Thus, unless you are absolutely sure it is required here, we recommend to refactor your code to avoid its usage.

Loading history...
2081
        }
2082
2083
        if (!$this->getDisabled() && $this->CLI_checkAndAcquireNewProcess($this->CLI_buildProcessId())) {
2084
            $countInARun = $cliObj->cli_argValue('--countInARun') ? intval($cliObj->cli_argValue('--countInARun')) : $this->extensionSettings['countInARun'];
2085
            // Seconds
2086
            $sleepAfterFinish = $cliObj->cli_argValue('--sleepAfterFinish') ? intval($cliObj->cli_argValue('--sleepAfterFinish')) : $this->extensionSettings['sleepAfterFinish'];
2087
            // Milliseconds
2088
            $sleepTime = $cliObj->cli_argValue('--sleepTime') ? intval($cliObj->cli_argValue('--sleepTime')) : $this->extensionSettings['sleepTime'];
2089
2090
            try {
2091
                // Run process:
2092
                $result = $this->CLI_run($countInARun, $sleepTime, $sleepAfterFinish);
2093
            } catch (\Exception $e) {
2094
                $this->CLI_debug(get_class($e) . ': ' . $e->getMessage());
2095
                $result = self::CLI_STATUS_ABORTED;
2096
            }
2097
2098
            // Cleanup
2099
            $this->db->exec_DELETEquery('tx_crawler_process', 'assigned_items_count = 0');
2100
2101
            //TODO can't we do that in a clean way?
2102
            $releaseStatus = $this->CLI_releaseProcesses($this->CLI_buildProcessId());
0 ignored issues
show
Unused Code introduced by
$releaseStatus is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
2103
2104
            $this->CLI_debug("Unprocessed Items remaining:" . $this->queueRepository->countUnprocessedItems() . " (" . $this->CLI_buildProcessId() . ")");
2105
            $result |= ($this->queueRepository->countUnprocessedItems() > 0 ? self::CLI_STATUS_REMAIN : self::CLI_STATUS_NOTHING_PROCCESSED);
2106
        } else {
2107
            $result |= self::CLI_STATUS_ABORTED;
2108
        }
2109
2110
        return $result;
2111
    }
2112
2113
    /**
2114
     * Function executed by crawler_im.php cli script.
2115
     *
2116
     * @return void
2117
     */
2118
    public function CLI_main_im()
2119
    {
2120
        $this->setAccessMode('cli_im');
2121
2122
        $cliObj = GeneralUtility::makeInstance(QueueCommandLineController::class);
2123
2124
        // Force user to admin state and set workspace to "Live":
2125
        $this->backendUser->user['admin'] = 1;
2126
        $this->backendUser->setWorkspace(0);
2127
2128
        // Print help
2129
        if (!isset($cliObj->cli_args['_DEFAULT'][1])) {
2130
            $cliObj->cli_validateArgs();
2131
            $cliObj->cli_help();
2132
            exit;
0 ignored issues
show
Coding Style Compatibility introduced by
The method CLI_main_im() contains an exit expression.

An exit expression should only be used in rare cases. For example, if you write a short command line script.

In most cases however, using an exit expression makes the code untestable and often causes incompatibilities with other libraries. Thus, unless you are absolutely sure it is required here, we recommend to refactor your code to avoid its usage.

Loading history...
2133
        }
2134
2135
        $cliObj->cli_validateArgs();
2136
2137
        if ($cliObj->cli_argValue('-o') === 'exec') {
2138
            $this->registerQueueEntriesInternallyOnly = true;
2139
        }
2140
2141
        if (isset($cliObj->cli_args['_DEFAULT'][2])) {
2142
            // Crawler is called over TYPO3 BE
2143
            $pageId = MathUtility::forceIntegerInRange($cliObj->cli_args['_DEFAULT'][2], 0);
2144
        } else {
2145
            // Crawler is called over cli
2146
            $pageId = MathUtility::forceIntegerInRange($cliObj->cli_args['_DEFAULT'][1], 0);
2147
        }
2148
2149
        $configurationKeys = $this->getConfigurationKeys($cliObj);
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Controller\C...:getConfigurationKeys() has been deprecated with message: since crawler v6.3.0, will be removed in crawler v7.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
2150
2151
        if (!is_array($configurationKeys)) {
2152
            $configurations = $this->getUrlsForPageId($pageId);
2153
            if (is_array($configurations)) {
2154
                $configurationKeys = array_keys($configurations);
2155
            } else {
2156
                $configurationKeys = [];
2157
            }
2158
        }
2159
2160
        if ($cliObj->cli_argValue('-o') === 'queue' || $cliObj->cli_argValue('-o') === 'exec') {
2161
            $reason = new Reason();
2162
            $reason->setReason(Reason::REASON_GUI_SUBMIT);
2163
            $reason->setDetailText('The cli script of the crawler added to the queue');
2164
2165
            // The event dispatcher is deprecated since crawler v6.4.0, will be removed in crawler v7.0.0.
2166
            // Please use the Signal instead.
2167
            EventDispatcher::getInstance()->post(
2168
                'invokeQueueChange',
2169
                $this->setID,
2170
                ['reason' => $reason]
2171
            );
2172
2173
            $signalPayload = ['reason' => $reason];
2174
            SignalSlotUtility::emitSignal(
2175
                __CLASS__,
2176
                SignalSlotUtility::SIGNAL_INVOKE_QUEUE_CHANGE,
2177
                $signalPayload
2178
            );
2179
2180
        }
2181
2182
        if ($this->extensionSettings['cleanUpOldQueueEntries']) {
2183
            $this->cleanUpOldQueueEntries();
2184
        }
2185
2186
        $this->setID = (int) GeneralUtility::md5int(microtime());
2187
        $this->getPageTreeAndUrls(
2188
            $pageId,
2189
            MathUtility::forceIntegerInRange($cliObj->cli_argValue('-d'), 0, 99),
2190
            $this->getCurrentTime(),
2191
            MathUtility::forceIntegerInRange($cliObj->cli_isArg('-n') ? $cliObj->cli_argValue('-n') : 30, 1, 1000),
2192
            $cliObj->cli_argValue('-o') === 'queue' || $cliObj->cli_argValue('-o') === 'exec',
2193
            $cliObj->cli_argValue('-o') === 'url',
2194
            GeneralUtility::trimExplode(',', $cliObj->cli_argValue('-proc'), true),
2195
            $configurationKeys
2196
        );
2197
2198
        if ($cliObj->cli_argValue('-o') === 'url') {
2199
            $cliObj->cli_echo(implode(chr(10), $this->downloadUrls) . chr(10), true);
2200
        } elseif ($cliObj->cli_argValue('-o') === 'exec') {
2201
            $cliObj->cli_echo("Executing " . count($this->urlList) . " requests right away:\n\n");
2202
            $cliObj->cli_echo(implode(chr(10), $this->urlList) . chr(10));
2203
            $cliObj->cli_echo("\nProcessing:\n");
2204
2205
            foreach ($this->queueEntries as $queueRec) {
2206
                $p = unserialize($queueRec['parameters']);
2207
                $cliObj->cli_echo($p['url'] . ' (' . implode(',', $p['procInstructions']) . ') => ');
2208
2209
                $result = $this->readUrlFromArray($queueRec);
2210
2211
                $requestResult = unserialize($result['content']);
2212
                if (is_array($requestResult)) {
2213
                    $resLog = is_array($requestResult['log']) ? chr(10) . chr(9) . chr(9) . implode(chr(10) . chr(9) . chr(9), $requestResult['log']) : '';
2214
                    $cliObj->cli_echo('OK: ' . $resLog . chr(10));
2215
                } else {
2216
                    $cliObj->cli_echo('Error checking Crawler Result: ' . substr(preg_replace('/\s+/', ' ', strip_tags($result['content'])), 0, 30000) . '...' . chr(10));
2217
                }
2218
            }
2219
        } elseif ($cliObj->cli_argValue('-o') === 'queue') {
2220
            $cliObj->cli_echo("Putting " . count($this->urlList) . " entries in queue:\n\n");
2221
            $cliObj->cli_echo(implode(chr(10), $this->urlList) . chr(10));
2222
        } else {
2223
            $cliObj->cli_echo(count($this->urlList) . " entries found for processing. (Use -o to decide action):\n\n", true);
2224
            $cliObj->cli_echo(implode(chr(10), $this->urlList) . chr(10), true);
2225
        }
2226
    }
2227
2228
    /**
2229
     * Function executed by crawler_im.php cli script.
2230
     *
2231
     * @return bool
2232
     */
2233
    public function CLI_main_flush()
2234
    {
2235
        $this->setAccessMode('cli_flush');
2236
        $cliObj = GeneralUtility::makeInstance(FlushCommandLineController::class);
2237
2238
        // Force user to admin state and set workspace to "Live":
2239
        $this->backendUser->user['admin'] = 1;
2240
        $this->backendUser->setWorkspace(0);
2241
2242
        // Print help
2243
        if (!isset($cliObj->cli_args['_DEFAULT'][1])) {
2244
            $cliObj->cli_validateArgs();
2245
            $cliObj->cli_help();
2246
            exit;
0 ignored issues
show
Coding Style Compatibility introduced by
The method CLI_main_flush() contains an exit expression.

An exit expression should only be used in rare cases. For example, if you write a short command line script.

In most cases however, using an exit expression makes the code untestable and often causes incompatibilities with other libraries. Thus, unless you are absolutely sure it is required here, we recommend to refactor your code to avoid its usage.

Loading history...
2247
        }
2248
2249
        $cliObj->cli_validateArgs();
2250
        $pageId = MathUtility::forceIntegerInRange($cliObj->cli_args['_DEFAULT'][1], 0);
2251
        $fullFlush = ($pageId == 0);
2252
2253
        $mode = $cliObj->cli_argValue('-o');
2254
2255
        switch ($mode) {
2256
            case 'all':
2257
                $result = $this->getLogEntriesForPageId($pageId, '', true, $fullFlush);
2258
                break;
2259
            case 'finished':
2260
            case 'pending':
2261
                $result = $this->getLogEntriesForPageId($pageId, $mode, true, $fullFlush);
2262
                break;
2263
            default:
2264
                $cliObj->cli_validateArgs();
2265
                $cliObj->cli_help();
2266
                $result = false;
2267
        }
2268
2269
        return $result !== false;
2270
    }
2271
2272
    /**
2273
     * Obtains configuration keys from the CLI arguments
2274
     *
2275
     * @param QueueCommandLineController $cliObj
2276
     * @return array
2277
     *
2278
     * @deprecated since crawler v6.3.0, will be removed in crawler v7.0.0.
2279
     */
2280
    protected function getConfigurationKeys(QueueCommandLineController $cliObj)
2281
    {
2282
        $parameter = trim($cliObj->cli_argValue('-conf'));
2283
        return ($parameter != '' ? GeneralUtility::trimExplode(',', $parameter) : []);
2284
    }
2285
2286
    /**
2287
     * Running the functionality of the CLI (crawling URLs from queue)
2288
     *
2289
     * @param int $countInARun
2290
     * @param int $sleepTime
2291
     * @param int $sleepAfterFinish
2292
     * @return string
2293
     */
2294
    public function CLI_run($countInARun, $sleepTime, $sleepAfterFinish)
2295
    {
2296
        $result = 0;
2297
        $counter = 0;
2298
2299
        // First, run hooks:
2300
        $this->CLI_runHooks();
2301
2302
        // Clean up the queue
2303
        if (intval($this->extensionSettings['purgeQueueDays']) > 0) {
2304
            $purgeDate = $this->getCurrentTime() - 24 * 60 * 60 * intval($this->extensionSettings['purgeQueueDays']);
2305
            $del = $this->db->exec_DELETEquery(
2306
                'tx_crawler_queue',
2307
                'exec_time!=0 AND exec_time<' . $purgeDate
2308
            );
2309
            if (false == $del) {
2310
                GeneralUtility::devLog('Records could not be deleted.', 'crawler', LogLevel::INFO);
2311
            }
2312
        }
2313
2314
        // Select entries:
2315
        //TODO Shouldn't this reside within the transaction?
2316
        $rows = $this->db->exec_SELECTgetRows(
2317
            'qid,scheduled',
2318
            'tx_crawler_queue',
2319
            'exec_time=0
2320
                AND process_scheduled= 0
2321
                AND scheduled<=' . $this->getCurrentTime(),
2322
            '',
2323
            'scheduled, qid',
2324
        intval($countInARun)
2325
        );
2326
2327
        if (count($rows) > 0) {
2328
            $quidList = [];
2329
2330
            foreach ($rows as $r) {
0 ignored issues
show
Bug introduced by
The expression $rows of type null|array is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
2331
                $quidList[] = $r['qid'];
2332
            }
2333
2334
            $processId = $this->CLI_buildProcessId();
2335
2336
            //reserve queue entries for process
2337
            $this->db->sql_query('BEGIN');
2338
            //TODO make sure we're not taking assigned queue-entires
2339
            $this->db->exec_UPDATEquery(
2340
                'tx_crawler_queue',
2341
                'qid IN (' . implode(',', $quidList) . ')',
2342
                [
2343
                    'process_scheduled' => intval($this->getCurrentTime()),
2344
                    'process_id' => $processId
2345
                ]
2346
            );
2347
2348
            //save the number of assigned queue entrys to determine who many have been processed later
2349
            $numberOfAffectedRows = $this->db->sql_affected_rows();
2350
            $this->db->exec_UPDATEquery(
2351
                'tx_crawler_process',
2352
                "process_id = '" . $processId . "'",
2353
                [
2354
                    'assigned_items_count' => intval($numberOfAffectedRows)
2355
                ]
2356
            );
2357
2358
            if ($numberOfAffectedRows == count($quidList)) {
2359
                $this->db->sql_query('COMMIT');
2360
            } else {
2361
                $this->db->sql_query('ROLLBACK');
2362
                $this->CLI_debug("Nothing processed due to multi-process collision (" . $this->CLI_buildProcessId() . ")");
2363
                return ($result | self::CLI_STATUS_ABORTED);
2364
            }
2365
2366
            foreach ($rows as $r) {
0 ignored issues
show
Bug introduced by
The expression $rows of type null|array is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
2367
                $result |= $this->readUrl($r['qid']);
2368
2369
                $counter++;
2370
                usleep(intval($sleepTime)); // Just to relax the system
2371
2372
                // if during the start and the current read url the cli has been disable we need to return from the function
2373
                // mark the process NOT as ended.
2374
                if ($this->getDisabled()) {
2375
                    return ($result | self::CLI_STATUS_ABORTED);
2376
                }
2377
2378
                if (!$this->CLI_checkIfProcessIsActive($this->CLI_buildProcessId())) {
2379
                    $this->CLI_debug("conflict / timeout (" . $this->CLI_buildProcessId() . ")");
2380
2381
                    //TODO might need an additional returncode
2382
                    $result |= self::CLI_STATUS_ABORTED;
2383
                    break; //possible timeout
2384
                }
2385
            }
2386
2387
            sleep(intval($sleepAfterFinish));
2388
2389
            $msg = 'Rows: ' . $counter;
2390
            $this->CLI_debug($msg . " (" . $this->CLI_buildProcessId() . ")");
2391
        } else {
2392
            $this->CLI_debug("Nothing within queue which needs to be processed (" . $this->CLI_buildProcessId() . ")");
2393
        }
2394
2395
        if ($counter > 0) {
2396
            $result |= self::CLI_STATUS_PROCESSED;
2397
        }
2398
2399
        return $result;
2400
    }
2401
2402
    /**
2403
     * Activate hooks
2404
     *
2405
     * @return void
2406
     */
2407
    public function CLI_runHooks()
2408
    {
2409
        global $TYPO3_CONF_VARS;
2410
        if (is_array($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'])) {
2411
            foreach ($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'] as $objRef) {
2412
                $hookObj = &GeneralUtility::getUserObj($objRef);
2413
                if (is_object($hookObj)) {
2414
                    $hookObj->crawler_init($this);
2415
                }
2416
            }
2417
        }
2418
    }
2419
2420
    /**
2421
     * Try to acquire a new process with the given id
2422
     * also performs some auto-cleanup for orphan processes
2423
     * @todo preemption might not be the most elegant way to clean up
2424
     *
2425
     * @param string $id identification string for the process
2426
     * @return boolean
2427
     */
2428
    public function CLI_checkAndAcquireNewProcess($id)
2429
    {
2430
        $ret = true;
2431
2432
        $systemProcessId = getmypid();
2433
        if ($systemProcessId < 1) {
2434
            return false;
2435
        }
2436
2437
        $processCount = 0;
2438
        $orphanProcesses = [];
2439
2440
        $this->db->sql_query('BEGIN');
2441
2442
        $res = $this->db->exec_SELECTquery(
2443
            'process_id,ttl',
2444
            'tx_crawler_process',
2445
            'active=1 AND deleted=0'
2446
            );
2447
2448
        $currentTime = $this->getCurrentTime();
2449
2450
        while ($row = $this->db->sql_fetch_assoc($res)) {
2451
            if ($row['ttl'] < $currentTime) {
2452
                $orphanProcesses[] = $row['process_id'];
2453
            } else {
2454
                $processCount++;
2455
            }
2456
        }
2457
2458
        // if there are less than allowed active processes then add a new one
2459
        if ($processCount < intval($this->extensionSettings['processLimit'])) {
2460
            $this->CLI_debug("add process " . $this->CLI_buildProcessId() . " (" . ($processCount + 1) . "/" . intval($this->extensionSettings['processLimit']) . ")");
2461
2462
            // create new process record
2463
            $this->db->exec_INSERTquery(
2464
                'tx_crawler_process',
2465
                [
2466
                    'process_id' => $id,
2467
                    'active' => '1',
2468
                    'ttl' => ($currentTime + intval($this->extensionSettings['processMaxRunTime'])),
2469
                    'system_process_id' => $systemProcessId
2470
                ]
2471
                );
2472
        } else {
2473
            $this->CLI_debug("Processlimit reached (" . ($processCount) . "/" . intval($this->extensionSettings['processLimit']) . ")");
2474
            $ret = false;
2475
        }
2476
2477
        $this->CLI_releaseProcesses($orphanProcesses, true); // maybe this should be somehow included into the current lock
2478
        $this->CLI_deleteProcessesMarkedDeleted();
2479
2480
        $this->db->sql_query('COMMIT');
2481
2482
        return $ret;
2483
    }
2484
2485
    /**
2486
     * Release a process and the required resources
2487
     *
2488
     * @param  mixed    $releaseIds   string with a single process-id or array with multiple process-ids
2489
     * @param  boolean  $withinLock   show whether the DB-actions are included within an existing lock
2490
     * @return boolean
2491
     */
2492
    public function CLI_releaseProcesses($releaseIds, $withinLock = false)
2493
    {
2494
        if (!is_array($releaseIds)) {
2495
            $releaseIds = [$releaseIds];
2496
        }
2497
2498
        if (!count($releaseIds) > 0) {
2499
            return false;   //nothing to release
2500
        }
2501
2502
        if (!$withinLock) {
2503
            $this->db->sql_query('BEGIN');
2504
        }
2505
2506
        // some kind of 2nd chance algo - this way you need at least 2 processes to have a real cleanup
2507
        // this ensures that a single process can't mess up the entire process table
2508
2509
        // mark all processes as deleted which have no "waiting" queue-entires and which are not active
2510
        $this->db->exec_UPDATEquery(
2511
            'tx_crawler_queue',
2512
            'process_id IN (SELECT process_id FROM tx_crawler_process WHERE active=0 AND deleted=0)',
2513
            [
2514
                'process_scheduled' => 0,
2515
                'process_id' => ''
2516
            ]
2517
        );
2518
        $this->db->exec_UPDATEquery(
2519
            'tx_crawler_process',
2520
            'active=0 AND deleted=0
2521
            AND NOT EXISTS (
2522
                SELECT * FROM tx_crawler_queue
2523
                WHERE tx_crawler_queue.process_id = tx_crawler_process.process_id
2524
                AND tx_crawler_queue.exec_time = 0
2525
            )',
2526
            [
2527
                'deleted' => '1',
2528
                'system_process_id' => 0
2529
            ]
2530
        );
2531
        // mark all requested processes as non-active
2532
        $this->db->exec_UPDATEquery(
2533
            'tx_crawler_process',
2534
            'process_id IN (\'' . implode('\',\'', $releaseIds) . '\') AND deleted=0',
2535
            [
2536
                'active' => '0'
2537
            ]
2538
        );
2539
        $this->db->exec_UPDATEquery(
2540
            'tx_crawler_queue',
2541
            'exec_time=0 AND process_id IN ("' . implode('","', $releaseIds) . '")',
2542
            [
2543
                'process_scheduled' => 0,
2544
                'process_id' => ''
2545
            ]
2546
        );
2547
2548
        if (!$withinLock) {
2549
            $this->db->sql_query('COMMIT');
2550
        }
2551
2552
        return true;
2553
    }
2554
2555
    /**
2556
     * Delete processes marked as deleted
2557
     *
2558
     * @return void
2559
     */
2560 1
    public function CLI_deleteProcessesMarkedDeleted()
2561
    {
2562 1
        $this->db->exec_DELETEquery('tx_crawler_process', 'deleted = 1');
2563 1
    }
2564
2565
    /**
2566
     * Check if there are still resources left for the process with the given id
2567
     * Used to determine timeouts and to ensure a proper cleanup if there's a timeout
2568
     *
2569
     * @param  string  identification string for the process
2570
     * @return boolean determines if the process is still active / has resources
2571
     *
2572
     * FIXME: Please remove Transaction, not needed as only a select query.
2573
     */
2574
    public function CLI_checkIfProcessIsActive($pid)
2575
    {
2576
        $ret = false;
2577
        $this->db->sql_query('BEGIN');
2578
        $res = $this->db->exec_SELECTquery(
2579
            'process_id,active,ttl',
2580
            'tx_crawler_process',
2581
            'process_id = \'' . $pid . '\'  AND deleted=0',
2582
            '',
2583
            'ttl',
2584
            '0,1'
2585
        );
2586
        if ($row = $this->db->sql_fetch_assoc($res)) {
2587
            $ret = intVal($row['active']) == 1;
2588
        }
2589
        $this->db->sql_query('COMMIT');
2590
2591
        return $ret;
2592
    }
2593
2594
    /**
2595
     * Create a unique Id for the current process
2596
     *
2597
     * @return string  the ID
2598
     */
2599 2
    public function CLI_buildProcessId()
2600
    {
2601 2
        if (!$this->processID) {
2602 1
            $this->processID = GeneralUtility::shortMD5($this->microtime(true));
2603
        }
2604 2
        return $this->processID;
2605
    }
2606
2607
    /**
2608
     * @param bool $get_as_float
2609
     *
2610
     * @return mixed
2611
     */
2612
    protected function microtime($get_as_float = false)
2613
    {
2614
        return microtime($get_as_float);
2615
    }
2616
2617
    /**
2618
     * Prints a message to the stdout (only if debug-mode is enabled)
2619
     *
2620
     * @param  string $msg  the message
2621
     */
2622
    public function CLI_debug($msg)
2623
    {
2624
        if (intval($this->extensionSettings['processDebug'])) {
2625
            echo $msg . "\n";
2626
            flush();
2627
        }
2628
    }
2629
2630
    /**
2631
     * Get URL content by making direct request to TYPO3.
2632
     *
2633
     * @param  string $url          Page URL
2634
     * @param  int    $crawlerId    Crawler-ID
2635
     * @return array
2636
     */
2637 2
    protected function sendDirectRequest($url, $crawlerId)
2638
    {
2639 2
        $parsedUrl = parse_url($url);
2640 2
        if (!is_array($parsedUrl)) {
2641
            return [];
2642
        }
2643
2644 2
        $requestHeaders = $this->buildRequestHeaderArray($parsedUrl, $crawlerId);
2645
2646 2
        $cmd = escapeshellcmd($this->extensionSettings['phpPath']);
2647 2
        $cmd .= ' ';
2648 2
        $cmd .= escapeshellarg(ExtensionManagementUtility::extPath('crawler') . 'cli/bootstrap.php');
2649 2
        $cmd .= ' ';
2650 2
        $cmd .= escapeshellarg($this->getFrontendBasePath());
2651 2
        $cmd .= ' ';
2652 2
        $cmd .= escapeshellarg($url);
2653 2
        $cmd .= ' ';
2654 2
        $cmd .= escapeshellarg(base64_encode(serialize($requestHeaders)));
2655
2656 2
        $startTime = microtime(true);
2657 2
        $content = $this->executeShellCommand($cmd);
2658 2
        $this->log($url . ' ' . (microtime(true) - $startTime));
2659
2660
        $result = [
2661 2
            'request' => implode("\r\n", $requestHeaders) . "\r\n\r\n",
2662 2
            'headers' => '',
2663 2
            'content' => $content
2664
        ];
2665
2666 2
        return $result;
2667
    }
2668
2669
    /**
2670
     * Cleans up entries that stayed for too long in the queue. These are:
2671
     * - processed entries that are over 1.5 days in age
2672
     * - scheduled entries that are over 7 days old
2673
     *
2674
     * @return void
2675
     *
2676
     * TODO: Should be switched back to protected - TNM 2018-11-16
2677
     */
2678
    public function cleanUpOldQueueEntries()
2679
    {
2680
        $processedAgeInSeconds = $this->extensionSettings['cleanUpProcessedAge'] * 86400; // 24*60*60 Seconds in 24 hours
2681
        $scheduledAgeInSeconds = $this->extensionSettings['cleanUpScheduledAge'] * 86400;
2682
2683
        $now = time();
2684
        $condition = '(exec_time<>0 AND exec_time<' . ($now - $processedAgeInSeconds) . ') OR scheduled<=' . ($now - $scheduledAgeInSeconds);
2685
        $this->flushQueue($condition);
2686
    }
2687
2688
    /**
2689
     * Initializes a TypoScript Frontend necessary for using TypoScript and TypoLink functions
2690
     *
2691
     * @param int $id
2692
     * @param int $typeNum
2693
     *
2694
     * @throws \TYPO3\CMS\Core\Error\Http\ServiceUnavailableException
2695
     *
2696
     * @return void
2697
     */
2698
    protected function initTSFE($id = 1, $typeNum = 0)
2699
    {
2700
        EidUtility::initTCA();
2701
        $timeTracker = GeneralUtility::makeInstance(TimeTracker::class);
2702
        $timeTracker->start();
2703
2704
        $GLOBALS['TSFE'] = GeneralUtility::makeInstance(TypoScriptFrontendController::class, $GLOBALS['TYPO3_CONF_VARS'], $id, $typeNum);
2705
        $GLOBALS['TSFE']->sys_page = GeneralUtility::makeInstance(PageRepository::class);
2706
        $GLOBALS['TSFE']->sys_page->init(true);
2707
        $GLOBALS['TSFE']->connectToDB();
2708
        $GLOBALS['TSFE']->initFEuser();
2709
        $GLOBALS['TSFE']->determineId();
2710
        $GLOBALS['TSFE']->initTemplate();
2711
        $GLOBALS['TSFE']->rootLine = $GLOBALS['TSFE']->sys_page->getRootLine($id, '');
2712
        $GLOBALS['TSFE']->getConfigArray();
2713
        PageGenerator::pagegenInit();
0 ignored issues
show
Deprecated Code introduced by
The method TYPO3\CMS\Frontend\Page\...enerator::pagegenInit() has been deprecated with message: since TYPO3 v8, will be removed in TYPO3 v9

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
2714
    }
2715
2716
    /**
2717
     * Returns a md5 hash generated from a serialized configuration array.
2718
     *
2719
     * @param array $configuration
2720
     *
2721
     * @return string
2722
     */
2723 9
    protected function getConfigurationHash(array $configuration) {
2724 9
        unset($configuration['paramExpanded']);
2725 9
        unset($configuration['URLs']);
2726 9
        return md5(serialize($configuration));
2727
    }
2728
2729
    /**
2730
     * Check whether the Crawling Protocol should be http or https
2731
     *
2732
     * @param $crawlerConfiguration
2733
     * @param $pageConfiguration
2734
     *
2735
     * @return bool
2736
     */
2737 6
    protected function isCrawlingProtocolHttps($crawlerConfiguration, $pageConfiguration) {
2738
        switch($crawlerConfiguration) {
2739 6
            case -1:
2740 1
                return false;
2741 5
            case 0:
2742 3
                return $pageConfiguration;
2743 2
            case 1:
2744 1
                return true;
2745
            default:
2746 1
                return false;
2747
        }
2748
    }
2749
}
2750