@@ -53,7 +53,7 @@ |
||
| 53 | 53 | * @param string $uri |
| 54 | 54 | * @param TypoScriptFrontendController $frontend |
| 55 | 55 | * |
| 56 | - * @return array |
|
| 56 | + * @return string[] |
|
| 57 | 57 | * |
| 58 | 58 | * @throws \Exception |
| 59 | 59 | * |
@@ -209,7 +209,7 @@ |
||
| 209 | 209 | /** |
| 210 | 210 | * Determines if a page is queued |
| 211 | 211 | * |
| 212 | - * @param $uid |
|
| 212 | + * @param integer $uid |
|
| 213 | 213 | * @param bool $unprocessed_only |
| 214 | 214 | * @param bool $timed_only |
| 215 | 215 | * @param bool $timestamp |
@@ -1054,8 +1054,9 @@ discard block |
||
| 1054 | 1054 | $isAvailable = false; |
| 1055 | 1055 | $userArray = \TYPO3\CMS\Backend\Utility\BackendUtility::getRecordsByField('be_users', 'username', '_cli_crawler'); |
| 1056 | 1056 | |
| 1057 | - if (is_array($userArray)) |
|
| 1058 | - $isAvailable = true; |
|
| 1057 | + if (is_array($userArray)) { |
|
| 1058 | + $isAvailable = true; |
|
| 1059 | + } |
|
| 1059 | 1060 | |
| 1060 | 1061 | return $isAvailable; |
| 1061 | 1062 | } |
@@ -1073,8 +1074,9 @@ discard block |
||
| 1073 | 1074 | $isAvailable = false; |
| 1074 | 1075 | $userArray = \TYPO3\CMS\Backend\Utility\BackendUtility::getRecordsByField('be_users', 'username', '_cli_crawler'); |
| 1075 | 1076 | |
| 1076 | - if (is_array($userArray) && $userArray[0]['admin'] == 0) |
|
| 1077 | - $isAvailable = true; |
|
| 1077 | + if (is_array($userArray) && $userArray[0]['admin'] == 0) { |
|
| 1078 | + $isAvailable = true; |
|
| 1079 | + } |
|
| 1078 | 1080 | |
| 1079 | 1081 | return $isAvailable; |
| 1080 | 1082 | } |
@@ -1,4 +1,7 @@ |
||
| 1 | -<?php if (!defined('TYPO3_MODE')) die ('Access denied.'); ?> |
|
| 1 | +<?php if (!defined('TYPO3_MODE')) { |
|
| 2 | + die ('Access denied.'); |
|
| 3 | +} |
|
| 4 | +?> |
|
| 2 | 5 | |
| 3 | 6 | Page: |
| 4 | 7 | <?php for($currentPageOffset = 0; $currentPageOffset < $this->getTotalPagesCount(); $currentPageOffset++ ){ ?> |
@@ -1,4 +1,7 @@ discard block |
||
| 1 | -<?php if (!defined('TYPO3_MODE')) die ('Access denied.'); ?> |
|
| 1 | +<?php if (!defined('TYPO3_MODE')) { |
|
| 2 | + die ('Access denied.'); |
|
| 3 | +} |
|
| 4 | +?> |
|
| 2 | 5 | |
| 3 | 6 | <br /> |
| 4 | 7 | <div id="controll-panel"> |
@@ -69,8 +72,11 @@ discard block |
||
| 69 | 72 | </div> |
| 70 | 73 | <?php elseif ($process->getState() == 'cancelled'): ?> |
| 71 | 74 | <?php echo $this->getLLLabel('LLL:EXT:crawler/modfunc1/locallang.xml:labels.process.cancelled'); ?> |
| 72 | - <?php else: ?> |
|
| 73 | - <?php echo $this->getLLLabel('LLL:EXT:crawler/modfunc1/locallang.xml:labels.process.success'); ?> |
|
| 75 | + <?php else { |
|
| 76 | + : ?> |
|
| 77 | + <?php echo $this->getLLLabel('LLL:EXT:crawler/modfunc1/locallang.xml:labels.process.success'); |
|
| 78 | +} |
|
| 79 | +?> |
|
| 74 | 80 | <?php endif; ?> |
| 75 | 81 | </td> |
| 76 | 82 | </tr> |
@@ -76,7 +76,9 @@ |
||
| 76 | 76 | if ($params['pObj']->applicationData['tx_crawler']['running']) { |
| 77 | 77 | $grList = $params['pObj']->applicationData['tx_crawler']['parameters']['feUserGroupList']; |
| 78 | 78 | if ($grList) { |
| 79 | - if (!is_array($params['pObj']->fe_user->user)) $params['pObj']->fe_user->user = array(); |
|
| 79 | + if (!is_array($params['pObj']->fe_user->user)) { |
|
| 80 | + $params['pObj']->fe_user->user = array(); |
|
| 81 | + } |
|
| 80 | 82 | $params['pObj']->fe_user->user['usergroup'] = $grList; |
| 81 | 83 | $params['pObj']->applicationData['tx_crawler']['log'][] = 'User Groups: '.$grList; |
| 82 | 84 | } |
@@ -424,7 +424,7 @@ discard block |
||
| 424 | 424 | * |
| 425 | 425 | * @param string $piString PI to test |
| 426 | 426 | * @param array $incomingProcInstructions Processing instructions |
| 427 | - * @return boolean TRUE if found |
|
| 427 | + * @return boolean|null TRUE if found |
|
| 428 | 428 | */ |
| 429 | 429 | public function drawURLs_PIfilter($piString, array $incomingProcInstructions) { |
| 430 | 430 | if (empty($incomingProcInstructions)) { |
@@ -632,6 +632,9 @@ discard block |
||
| 632 | 632 | return $baseUrl; |
| 633 | 633 | } |
| 634 | 634 | |
| 635 | + /** |
|
| 636 | + * @param integer $rootid |
|
| 637 | + */ |
|
| 635 | 638 | function getConfigurationsForBranch($rootid, $depth) { |
| 636 | 639 | |
| 637 | 640 | $configurationsForBranch = array(); |
@@ -735,6 +738,7 @@ discard block |
||
| 735 | 738 | * |
| 736 | 739 | * @param array Array with key (GET var name) and values (value of GET var which is configuration for expansion) |
| 737 | 740 | * @param integer Current page ID |
| 741 | + * @param integer $pid |
|
| 738 | 742 | * @return array Array with key (GET var name) with the value being an array of all possible values for that key. |
| 739 | 743 | */ |
| 740 | 744 | function expandParameters($paramArray, $pid) { |
@@ -853,7 +857,7 @@ discard block |
||
| 853 | 857 | * The number of URLs will be the multiplication of the number of parameter values for each key |
| 854 | 858 | * |
| 855 | 859 | * @param array $paramArray Output of expandParameters(): Array with keys (GET var names) and for each an array of values |
| 856 | - * @param array $urls URLs accumulated in this array (for recursion) |
|
| 860 | + * @param string[] $urls URLs accumulated in this array (for recursion) |
|
| 857 | 861 | * @return array URLs accumulated, if number of urls exceed 'maxCompileUrls' it will return false as an error! |
| 858 | 862 | */ |
| 859 | 863 | public function compileUrls($paramArray, $urls = array()) { |
@@ -1031,6 +1035,8 @@ discard block |
||
| 1031 | 1035 | * @param integer Scheduled-time |
| 1032 | 1036 | * @param string (optional) configuration hash |
| 1033 | 1037 | * @param bool (optional) skip inner duplication check |
| 1038 | + * @param string $url |
|
| 1039 | + * @param double $tstamp |
|
| 1034 | 1040 | * @return bool true if the url was added, false if it already existed |
| 1035 | 1041 | */ |
| 1036 | 1042 | function addUrl ( |
@@ -1106,7 +1112,6 @@ discard block |
||
| 1106 | 1112 | * If the timestamp is in the future it will check, if the queued entry has exactly the same timestamp |
| 1107 | 1113 | * |
| 1108 | 1114 | * @param int $tstamp |
| 1109 | - * @param string $parameters |
|
| 1110 | 1115 | * @author Fabrizio Branca |
| 1111 | 1116 | * @author Timo Schmidt |
| 1112 | 1117 | * @return array; |
@@ -1470,6 +1475,7 @@ discard block |
||
| 1470 | 1475 | |
| 1471 | 1476 | /** |
| 1472 | 1477 | * @param message |
| 1478 | + * @param string $message |
|
| 1473 | 1479 | */ |
| 1474 | 1480 | protected function log($message) { |
| 1475 | 1481 | if (!empty($this->extensionSettings['logFileName'])) { |
@@ -1483,7 +1489,7 @@ discard block |
||
| 1483 | 1489 | * @param array $url |
| 1484 | 1490 | * @param string $crawlerId |
| 1485 | 1491 | * |
| 1486 | - * @return array |
|
| 1492 | + * @return string[] |
|
| 1487 | 1493 | */ |
| 1488 | 1494 | protected function buildRequestHeaderArray(array $url, $crawlerId) { |
| 1489 | 1495 | $reqHeaders = array(); |
@@ -1586,6 +1592,12 @@ discard block |
||
| 1586 | 1592 | * @param boolean If set (and submitcrawlUrls is false) will fill $downloadUrls with entries) |
| 1587 | 1593 | * @param array Array of processing instructions |
| 1588 | 1594 | * @param array Array of configuration keys |
| 1595 | + * @param integer $id |
|
| 1596 | + * @param integer $depth |
|
| 1597 | + * @param integer $scheduledTime |
|
| 1598 | + * @param integer $reqMinute |
|
| 1599 | + * @param boolean $submitCrawlUrls |
|
| 1600 | + * @param boolean $downloadCrawlUrls |
|
| 1589 | 1601 | * @return string HTML code |
| 1590 | 1602 | */ |
| 1591 | 1603 | function getPageTreeAndUrls( |
@@ -1740,6 +1752,7 @@ discard block |
||
| 1740 | 1752 | * |
| 1741 | 1753 | * @param array Page row |
| 1742 | 1754 | * @param string Page icon and title for row |
| 1755 | + * @param string $pageTitleAndIcon |
|
| 1743 | 1756 | * @return string HTML <tr> content (one or more) |
| 1744 | 1757 | */ |
| 1745 | 1758 | public function drawURLs_addRowsForPage(array $pageRow, $pageTitleAndIcon) { |
@@ -2040,7 +2053,7 @@ discard block |
||
| 2040 | 2053 | /** |
| 2041 | 2054 | * Function executed by crawler_im.php cli script. |
| 2042 | 2055 | * |
| 2043 | - * @return bool |
|
| 2056 | + * @return null|boolean |
|
| 2044 | 2057 | */ |
| 2045 | 2058 | function CLI_main_flush() { |
| 2046 | 2059 | $this->setAccessMode('cli_flush'); |
@@ -2097,7 +2110,7 @@ discard block |
||
| 2097 | 2110 | * @param int $countInARun |
| 2098 | 2111 | * @param int $sleepTime |
| 2099 | 2112 | * @param int $sleepAfterFinish |
| 2100 | - * @return string Status message |
|
| 2113 | + * @return integer Status message |
|
| 2101 | 2114 | */ |
| 2102 | 2115 | public function CLI_run($countInARun, $sleepTime, $sleepAfterFinish) { |
| 2103 | 2116 | $result = 0; |
@@ -2369,6 +2382,7 @@ discard block |
||
| 2369 | 2382 | * Used to determine timeouts and to ensure a proper cleanup if there's a timeout |
| 2370 | 2383 | * |
| 2371 | 2384 | * @param string identification string for the process |
| 2385 | + * @param string $pid |
|
| 2372 | 2386 | * @return boolean determines if the process is still active / has resources |
| 2373 | 2387 | * |
| 2374 | 2388 | * FIXME: Please remove Transaction, not needed as only a select query. |
@@ -2406,7 +2420,7 @@ discard block |
||
| 2406 | 2420 | /** |
| 2407 | 2421 | * @param bool $get_as_float |
| 2408 | 2422 | * |
| 2409 | - * @return mixed |
|
| 2423 | + * @return string |
|
| 2410 | 2424 | */ |
| 2411 | 2425 | protected function microtime($get_as_float = false ) |
| 2412 | 2426 | { |
@@ -642,7 +642,9 @@ discard block |
||
| 642 | 642 | $sets = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.']; |
| 643 | 643 | if(is_array($sets)) { |
| 644 | 644 | foreach($sets as $key=>$value) { |
| 645 | - if(!is_array($value)) continue; |
|
| 645 | + if(!is_array($value)) { |
|
| 646 | + continue; |
|
| 647 | + } |
|
| 646 | 648 | $configurationsForBranch[] = substr($key,-1)=='.'?substr($key,0,-1):$key; |
| 647 | 649 | } |
| 648 | 650 | |
@@ -990,7 +992,9 @@ discard block |
||
| 990 | 992 | */ |
| 991 | 993 | function addQueueEntry_callBack($setId,$params,$callBack,$page_id=0,$schedule=0) { |
| 992 | 994 | |
| 993 | - if (!is_array($params)) $params = array(); |
|
| 995 | + if (!is_array($params)) { |
|
| 996 | + $params = array(); |
|
| 997 | + } |
|
| 994 | 998 | $params['_CALLBACKOBJ'] = $callBack; |
| 995 | 999 | |
| 996 | 1000 | // Compile value array: |
@@ -1092,7 +1096,7 @@ discard block |
||
| 1092 | 1096 | $rows[] = $uid; |
| 1093 | 1097 | $urlAdded = true; |
| 1094 | 1098 | tx_crawler_domain_events_dispatcher::getInstance()->post('urlAddedToQueue',$this->setID,array('uid' => $uid, 'fieldArray' => $fieldArray)); |
| 1095 | - }else{ |
|
| 1099 | + } else{ |
|
| 1096 | 1100 | tx_crawler_domain_events_dispatcher::getInstance()->post('duplicateUrlInQueue',$this->setID,array('rows' => $rows, 'fieldArray' => $fieldArray)); |
| 1097 | 1101 | } |
| 1098 | 1102 | } |
@@ -1122,7 +1126,7 @@ discard block |
||
| 1122 | 1126 | $timeBegin = $currentTime - 100; |
| 1123 | 1127 | $timeEnd = $currentTime + 100; |
| 1124 | 1128 | $where = ' ((scheduled BETWEEN '.$timeBegin.' AND '.$timeEnd.' ) OR scheduled <= '. $currentTime.') '; |
| 1125 | - }else{ |
|
| 1129 | + } else{ |
|
| 1126 | 1130 | $where = 'scheduled <= ' . $currentTime; |
| 1127 | 1131 | } |
| 1128 | 1132 | } elseif ($tstamp > $currentTime) { |
@@ -1314,18 +1318,24 @@ discard block |
||
| 1314 | 1318 | */ |
| 1315 | 1319 | function requestUrl($originalUrl, $crawlerId, $timeout=2, $recursion=10) { |
| 1316 | 1320 | |
| 1317 | - if (!$recursion) return false; |
|
| 1321 | + if (!$recursion) { |
|
| 1322 | + return false; |
|
| 1323 | + } |
|
| 1318 | 1324 | |
| 1319 | 1325 | // Parse URL, checking for scheme: |
| 1320 | 1326 | $url = parse_url($originalUrl); |
| 1321 | 1327 | |
| 1322 | 1328 | if ($url === FALSE) { |
| 1323 | - if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Could not parse_url() for string "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
| 1329 | + if (TYPO3_DLOG) { |
|
| 1330 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Could not parse_url() for string "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
| 1331 | + } |
|
| 1324 | 1332 | return FALSE; |
| 1325 | 1333 | } |
| 1326 | 1334 | |
| 1327 | 1335 | if (!in_array($url['scheme'], array('','http','https'))) { |
| 1328 | - if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Scheme does not match for url "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
| 1336 | + if (TYPO3_DLOG) { |
|
| 1337 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Scheme does not match for url "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
| 1338 | + } |
|
| 1329 | 1339 | return FALSE; |
| 1330 | 1340 | } |
| 1331 | 1341 | |
@@ -1359,7 +1369,9 @@ discard block |
||
| 1359 | 1369 | $fp = fsockopen($host, $port, $errno, $errstr, $timeout); |
| 1360 | 1370 | |
| 1361 | 1371 | if (!$fp) { |
| 1362 | - if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
| 1372 | + if (TYPO3_DLOG) { |
|
| 1373 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
| 1374 | + } |
|
| 1363 | 1375 | return FALSE; |
| 1364 | 1376 | } else { |
| 1365 | 1377 | // Request message: |
@@ -1387,7 +1399,9 @@ discard block |
||
| 1387 | 1399 | if (is_array($newRequestUrl)) { |
| 1388 | 1400 | $result = array_merge(array('parentRequest'=>$result), $newRequestUrl); |
| 1389 | 1401 | } else { |
| 1390 | - if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
| 1402 | + if (TYPO3_DLOG) { |
|
| 1403 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
| 1404 | + } |
|
| 1391 | 1405 | return FALSE; |
| 1392 | 1406 | } |
| 1393 | 1407 | } |
@@ -1510,20 +1524,32 @@ discard block |
||
| 1510 | 1524 | * @return string URL from redirection |
| 1511 | 1525 | */ |
| 1512 | 1526 | protected function getRequestUrlFrom302Header($headers,$user='',$pass='') { |
| 1513 | - if(!is_array($headers)) return false; |
|
| 1514 | - if(!(stristr($headers[0],'301 Moved') || stristr($headers[0],'302 Found') || stristr($headers[0],'302 Moved'))) return false; |
|
| 1527 | + if(!is_array($headers)) { |
|
| 1528 | + return false; |
|
| 1529 | + } |
|
| 1530 | + if(!(stristr($headers[0],'301 Moved') || stristr($headers[0],'302 Found') || stristr($headers[0],'302 Moved'))) { |
|
| 1531 | + return false; |
|
| 1532 | + } |
|
| 1515 | 1533 | |
| 1516 | 1534 | foreach($headers as $hl) { |
| 1517 | 1535 | $tmp = explode(": ",$hl); |
| 1518 | 1536 | $header[trim($tmp[0])] = trim($tmp[1]); |
| 1519 | - if(trim($tmp[0])=='Location') break; |
|
| 1537 | + if(trim($tmp[0])=='Location') { |
|
| 1538 | + break; |
|
| 1539 | + } |
|
| 1540 | + } |
|
| 1541 | + if(!array_key_exists('Location',$header)) { |
|
| 1542 | + return false; |
|
| 1520 | 1543 | } |
| 1521 | - if(!array_key_exists('Location',$header)) return false; |
|
| 1522 | 1544 | |
| 1523 | 1545 | if($user!='') { |
| 1524 | - if(!($tmp = parse_url($header['Location']))) return false; |
|
| 1546 | + if(!($tmp = parse_url($header['Location']))) { |
|
| 1547 | + return false; |
|
| 1548 | + } |
|
| 1525 | 1549 | $newUrl = $tmp['scheme'] . '://' . $user . ':' . $pass . '@' . $tmp['host'] . $tmp['path']; |
| 1526 | - if($tmp['query']!='') $newUrl .= '?' . $tmp['query']; |
|
| 1550 | + if($tmp['query']!='') { |
|
| 1551 | + $newUrl .= '?' . $tmp['query']; |
|
| 1552 | + } |
|
| 1527 | 1553 | } else { |
| 1528 | 1554 | $newUrl = $header['Location']; |
| 1529 | 1555 | } |
@@ -1974,7 +2000,7 @@ discard block |
||
| 1974 | 2000 | $configurations = $this->getUrlsForPageId($pageId); |
| 1975 | 2001 | if(is_array($configurations)){ |
| 1976 | 2002 | $configurationKeys = array_keys($configurations); |
| 1977 | - }else{ |
|
| 2003 | + } else{ |
|
| 1978 | 2004 | $configurationKeys = array(); |
| 1979 | 2005 | } |
| 1980 | 2006 | } |
@@ -2306,7 +2332,9 @@ discard block |
||
| 2306 | 2332 | return false; //nothing to release |
| 2307 | 2333 | } |
| 2308 | 2334 | |
| 2309 | - if(!$withinLock) $this->db->sql_query('BEGIN'); |
|
| 2335 | + if(!$withinLock) { |
|
| 2336 | + $this->db->sql_query('BEGIN'); |
|
| 2337 | + } |
|
| 2310 | 2338 | |
| 2311 | 2339 | // some kind of 2nd chance algo - this way you need at least 2 processes to have a real cleanup |
| 2312 | 2340 | // this ensures that a single process can't mess up the entire process table |
@@ -2350,7 +2378,9 @@ discard block |
||
| 2350 | 2378 | ) |
| 2351 | 2379 | ); |
| 2352 | 2380 | |
| 2353 | - if(!$withinLock) $this->db->sql_query('COMMIT'); |
|
| 2381 | + if(!$withinLock) { |
|
| 2382 | + $this->db->sql_query('COMMIT'); |
|
| 2383 | + } |
|
| 2354 | 2384 | |
| 2355 | 2385 | return true; |
| 2356 | 2386 | } |
@@ -1,5 +1,7 @@ |
||
| 1 | 1 | <?php |
| 2 | -if (!defined('TYPO3_REQUESTTYPE')) die('You cannot run this script directly!'); |
|
| 2 | +if (!defined('TYPO3_REQUESTTYPE')) { |
|
| 3 | + die('You cannot run this script directly!'); |
|
| 4 | +} |
|
| 3 | 5 | |
| 4 | 6 | $crawlerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_lib'); |
| 5 | 7 | $crawlerObj->CLI_main_flush($_SERVER["argv"]); |
@@ -1,5 +1,7 @@ |
||
| 1 | 1 | <?php |
| 2 | -if (!defined('TYPO3_REQUESTTYPE')) die('You cannot run this script directly!'); |
|
| 2 | +if (!defined('TYPO3_REQUESTTYPE')) { |
|
| 3 | + die('You cannot run this script directly!'); |
|
| 4 | +} |
|
| 3 | 5 | |
| 4 | 6 | $crawlerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_lib'); |
| 5 | 7 | $crawlerObj->CLI_main_im($_SERVER["argv"]); |