@@ -83,21 +83,21 @@ |
||
83 | 83 | |
84 | 84 | // input for sleepTime |
85 | 85 | $fieldId = 'task_sleepTime'; |
86 | - $fieldCode = '<input type="text" name="tx_scheduler[sleepTime]" id="' . $fieldId . '" value="' . htmlentities($taskInfo['sleepTime']) . '" />'; |
|
86 | + $fieldCode = '<input type="text" name="tx_scheduler[sleepTime]" id="'.$fieldId.'" value="'.htmlentities($taskInfo['sleepTime']).'" />'; |
|
87 | 87 | $additionalFields[$fieldId] = array( |
88 | 88 | 'code' => $fieldCode, |
89 | 89 | 'label' => 'LLL:EXT:crawler/Resources/Private/Language/Backend.xlf:crawler_im.sleepTime' |
90 | 90 | ); |
91 | 91 | // input for sleepAfterFinish |
92 | 92 | $fieldId = 'task_sleepAfterFinish'; |
93 | - $fieldCode = '<input type="text" name="tx_scheduler[sleepAfterFinish]" id="' . $fieldId . '" value="' . htmlentities($taskInfo['sleepAfterFinish']) . '" />'; |
|
93 | + $fieldCode = '<input type="text" name="tx_scheduler[sleepAfterFinish]" id="'.$fieldId.'" value="'.htmlentities($taskInfo['sleepAfterFinish']).'" />'; |
|
94 | 94 | $additionalFields[$fieldId] = array( |
95 | 95 | 'code' => $fieldCode, |
96 | 96 | 'label' => 'LLL:EXT:crawler/Resources/Private/Language/Backend.xlf:crawler_im.sleepAfterFinish' |
97 | 97 | ); |
98 | 98 | // input for countInARun |
99 | 99 | $fieldId = 'task_countInARun'; |
100 | - $fieldCode = '<input type="text" name="tx_scheduler[countInARun]" id="' . $fieldId . '" value="' . htmlentities($taskInfo['countInARun']) . '" />'; |
|
100 | + $fieldCode = '<input type="text" name="tx_scheduler[countInARun]" id="'.$fieldId.'" value="'.htmlentities($taskInfo['countInARun']).'" />'; |
|
101 | 101 | $additionalFields[$fieldId] = array( |
102 | 102 | 'code' => $fieldCode, |
103 | 103 | 'label' => 'LLL:EXT:crawler/Resources/Private/Language/Backend.xlf:crawler_im.countInARun' |
@@ -88,7 +88,7 @@ discard block |
||
88 | 88 | |
89 | 89 | // input for startPage |
90 | 90 | $fieldId = 'task_startPage'; |
91 | - $fieldCode = '<input name="tx_scheduler[startPage]" type="text" id="' . $fieldId . '" value="' . $task->startPage . '" />'; |
|
91 | + $fieldCode = '<input name="tx_scheduler[startPage]" type="text" id="'.$fieldId.'" value="'.$task->startPage.'" />'; |
|
92 | 92 | $additionalFields[$fieldId] = array( |
93 | 93 | 'code' => $fieldCode, |
94 | 94 | 'label' => 'LLL:EXT:crawler/Resources/Private/Language/Backend.xlf:crawler_im.startPage' |
@@ -104,10 +104,10 @@ discard block |
||
104 | 104 | '4' => $GLOBALS['LANG']->sL('LLL:EXT:lang/Resources/Private/Language/locallang_core.xlf:labels.depth_4'), |
105 | 105 | '99' => $GLOBALS['LANG']->sL('LLL:EXT:lang/Resources/Private/Language/locallang_core.xlf:labels.depth_infi'), |
106 | 106 | ); |
107 | - $fieldCode = '<select name="tx_scheduler[depth]" id="' . $fieldId . '">'; |
|
107 | + $fieldCode = '<select name="tx_scheduler[depth]" id="'.$fieldId.'">'; |
|
108 | 108 | |
109 | 109 | foreach ($fieldValueArray as $key => $label) { |
110 | - $fieldCode .= "\t" . '<option value="' . $key . '"' . (($key == $task->depth) ? ' selected="selected"' : '') . '>' . $label . '</option>'; |
|
110 | + $fieldCode .= "\t".'<option value="'.$key.'"'.(($key == $task->depth) ? ' selected="selected"' : '').'>'.$label.'</option>'; |
|
111 | 111 | } |
112 | 112 | |
113 | 113 | $fieldCode .= '</select>'; |
@@ -119,10 +119,10 @@ discard block |
||
119 | 119 | // combobox for configuration records |
120 | 120 | $recordsArray = $this->getCrawlerConfigurationRecords(); |
121 | 121 | $fieldId = 'task_configuration'; |
122 | - $fieldCode = '<select name="tx_scheduler[configuration][]" multiple="multiple" id="' . $fieldId . '">'; |
|
123 | - $fieldCode .= "\t" . '<option value=""></option>'; |
|
122 | + $fieldCode = '<select name="tx_scheduler[configuration][]" multiple="multiple" id="'.$fieldId.'">'; |
|
123 | + $fieldCode .= "\t".'<option value=""></option>'; |
|
124 | 124 | for ($i = 0; $i < count($recordsArray); $i++) { |
125 | - $fieldCode .= "\t" . '<option ' . $this->getSelectedState($task->configuration, $recordsArray[$i]['name']) . 'value="' . $recordsArray[$i]['name'] . '">' . $recordsArray[$i]['name'] . '</option>'; |
|
125 | + $fieldCode .= "\t".'<option '.$this->getSelectedState($task->configuration, $recordsArray[$i]['name']).'value="'.$recordsArray[$i]['name'].'">'.$recordsArray[$i]['name'].'</option>'; |
|
126 | 126 | } |
127 | 127 | $fieldCode .= '</select>'; |
128 | 128 | |
@@ -165,7 +165,7 @@ discard block |
||
165 | 165 | $result = $GLOBALS['TYPO3_DB']->exec_SELECTquery( |
166 | 166 | '*', |
167 | 167 | 'tx_crawler_configuration', |
168 | - '1=1' . BackendUtility::deleteClause('tx_crawler_configuration') |
|
168 | + '1=1'.BackendUtility::deleteClause('tx_crawler_configuration') |
|
169 | 169 | ); |
170 | 170 | |
171 | 171 | while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) { |
@@ -424,7 +424,7 @@ discard block |
||
424 | 424 | * |
425 | 425 | * @param string $piString PI to test |
426 | 426 | * @param array $incomingProcInstructions Processing instructions |
427 | - * @return boolean TRUE if found |
|
427 | + * @return boolean|null TRUE if found |
|
428 | 428 | */ |
429 | 429 | public function drawURLs_PIfilter($piString, array $incomingProcInstructions) { |
430 | 430 | if (empty($incomingProcInstructions)) { |
@@ -632,6 +632,9 @@ discard block |
||
632 | 632 | return $baseUrl; |
633 | 633 | } |
634 | 634 | |
635 | + /** |
|
636 | + * @param integer $rootid |
|
637 | + */ |
|
635 | 638 | function getConfigurationsForBranch($rootid, $depth) { |
636 | 639 | |
637 | 640 | $configurationsForBranch = array(); |
@@ -735,6 +738,7 @@ discard block |
||
735 | 738 | * |
736 | 739 | * @param array Array with key (GET var name) and values (value of GET var which is configuration for expansion) |
737 | 740 | * @param integer Current page ID |
741 | + * @param integer $pid |
|
738 | 742 | * @return array Array with key (GET var name) with the value being an array of all possible values for that key. |
739 | 743 | */ |
740 | 744 | function expandParameters($paramArray, $pid) { |
@@ -853,7 +857,7 @@ discard block |
||
853 | 857 | * The number of URLs will be the multiplication of the number of parameter values for each key |
854 | 858 | * |
855 | 859 | * @param array $paramArray Output of expandParameters(): Array with keys (GET var names) and for each an array of values |
856 | - * @param array $urls URLs accumulated in this array (for recursion) |
|
860 | + * @param string[] $urls URLs accumulated in this array (for recursion) |
|
857 | 861 | * @return array URLs accumulated, if number of urls exceed 'maxCompileUrls' it will return false as an error! |
858 | 862 | */ |
859 | 863 | public function compileUrls($paramArray, $urls = array()) { |
@@ -1031,6 +1035,8 @@ discard block |
||
1031 | 1035 | * @param integer Scheduled-time |
1032 | 1036 | * @param string (optional) configuration hash |
1033 | 1037 | * @param bool (optional) skip inner duplication check |
1038 | + * @param string $url |
|
1039 | + * @param double $tstamp |
|
1034 | 1040 | * @return bool true if the url was added, false if it already existed |
1035 | 1041 | */ |
1036 | 1042 | function addUrl ( |
@@ -1106,7 +1112,6 @@ discard block |
||
1106 | 1112 | * If the timestamp is in the future it will check, if the queued entry has exactly the same timestamp |
1107 | 1113 | * |
1108 | 1114 | * @param int $tstamp |
1109 | - * @param string $parameters |
|
1110 | 1115 | * @author Fabrizio Branca |
1111 | 1116 | * @author Timo Schmidt |
1112 | 1117 | * @return array; |
@@ -1470,6 +1475,7 @@ discard block |
||
1470 | 1475 | |
1471 | 1476 | /** |
1472 | 1477 | * @param message |
1478 | + * @param string $message |
|
1473 | 1479 | */ |
1474 | 1480 | protected function log($message) { |
1475 | 1481 | if (!empty($this->extensionSettings['logFileName'])) { |
@@ -1483,7 +1489,7 @@ discard block |
||
1483 | 1489 | * @param array $url |
1484 | 1490 | * @param string $crawlerId |
1485 | 1491 | * |
1486 | - * @return array |
|
1492 | + * @return string[] |
|
1487 | 1493 | */ |
1488 | 1494 | protected function buildRequestHeaderArray(array $url, $crawlerId) { |
1489 | 1495 | $reqHeaders = array(); |
@@ -1586,6 +1592,12 @@ discard block |
||
1586 | 1592 | * @param boolean If set (and submitcrawlUrls is false) will fill $downloadUrls with entries) |
1587 | 1593 | * @param array Array of processing instructions |
1588 | 1594 | * @param array Array of configuration keys |
1595 | + * @param integer $id |
|
1596 | + * @param integer $depth |
|
1597 | + * @param integer $scheduledTime |
|
1598 | + * @param integer $reqMinute |
|
1599 | + * @param boolean $submitCrawlUrls |
|
1600 | + * @param boolean $downloadCrawlUrls |
|
1589 | 1601 | * @return string HTML code |
1590 | 1602 | */ |
1591 | 1603 | function getPageTreeAndUrls( |
@@ -1740,6 +1752,7 @@ discard block |
||
1740 | 1752 | * |
1741 | 1753 | * @param array Page row |
1742 | 1754 | * @param string Page icon and title for row |
1755 | + * @param string $pageTitleAndIcon |
|
1743 | 1756 | * @return string HTML <tr> content (one or more) |
1744 | 1757 | */ |
1745 | 1758 | public function drawURLs_addRowsForPage(array $pageRow, $pageTitleAndIcon) { |
@@ -2040,7 +2053,7 @@ discard block |
||
2040 | 2053 | /** |
2041 | 2054 | * Function executed by crawler_im.php cli script. |
2042 | 2055 | * |
2043 | - * @return bool |
|
2056 | + * @return null|boolean |
|
2044 | 2057 | */ |
2045 | 2058 | function CLI_main_flush() { |
2046 | 2059 | $this->setAccessMode('cli_flush'); |
@@ -2097,7 +2110,7 @@ discard block |
||
2097 | 2110 | * @param int $countInARun |
2098 | 2111 | * @param int $sleepTime |
2099 | 2112 | * @param int $sleepAfterFinish |
2100 | - * @return string Status message |
|
2113 | + * @return integer Status message |
|
2101 | 2114 | */ |
2102 | 2115 | public function CLI_run($countInARun, $sleepTime, $sleepAfterFinish) { |
2103 | 2116 | $result = 0; |
@@ -2369,6 +2382,7 @@ discard block |
||
2369 | 2382 | * Used to determine timeouts and to ensure a proper cleanup if there's a timeout |
2370 | 2383 | * |
2371 | 2384 | * @param string identification string for the process |
2385 | + * @param string $pid |
|
2372 | 2386 | * @return boolean determines if the process is still active / has resources |
2373 | 2387 | * |
2374 | 2388 | * FIXME: Please remove Transaction, not needed as only a select query. |
@@ -2406,7 +2420,7 @@ discard block |
||
2406 | 2420 | /** |
2407 | 2421 | * @param bool $get_as_float |
2408 | 2422 | * |
2409 | - * @return mixed |
|
2423 | + * @return string |
|
2410 | 2424 | */ |
2411 | 2425 | protected function microtime($get_as_float = false ) |
2412 | 2426 | { |
@@ -1329,7 +1329,7 @@ discard block |
||
1329 | 1329 | return FALSE; |
1330 | 1330 | } |
1331 | 1331 | |
1332 | - // direct request |
|
1332 | + // direct request |
|
1333 | 1333 | if ($this->extensionSettings['makeDirectRequests']) { |
1334 | 1334 | $result = $this->sendDirectRequest($originalUrl, $crawlerId); |
1335 | 1335 | return $result; |
@@ -2360,7 +2360,7 @@ discard block |
||
2360 | 2360 | * |
2361 | 2361 | * @return void |
2362 | 2362 | */ |
2363 | - public function CLI_deleteProcessesMarkedDeleted() { |
|
2363 | + public function CLI_deleteProcessesMarkedDeleted() { |
|
2364 | 2364 | $this->db->exec_DELETEquery('tx_crawler_process', 'deleted = 1'); |
2365 | 2365 | } |
2366 | 2366 |
@@ -642,7 +642,9 @@ discard block |
||
642 | 642 | $sets = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.']; |
643 | 643 | if(is_array($sets)) { |
644 | 644 | foreach($sets as $key=>$value) { |
645 | - if(!is_array($value)) continue; |
|
645 | + if(!is_array($value)) { |
|
646 | + continue; |
|
647 | + } |
|
646 | 648 | $configurationsForBranch[] = substr($key,-1)=='.'?substr($key,0,-1):$key; |
647 | 649 | } |
648 | 650 | |
@@ -990,7 +992,9 @@ discard block |
||
990 | 992 | */ |
991 | 993 | function addQueueEntry_callBack($setId,$params,$callBack,$page_id=0,$schedule=0) { |
992 | 994 | |
993 | - if (!is_array($params)) $params = array(); |
|
995 | + if (!is_array($params)) { |
|
996 | + $params = array(); |
|
997 | + } |
|
994 | 998 | $params['_CALLBACKOBJ'] = $callBack; |
995 | 999 | |
996 | 1000 | // Compile value array: |
@@ -1092,7 +1096,7 @@ discard block |
||
1092 | 1096 | $rows[] = $uid; |
1093 | 1097 | $urlAdded = true; |
1094 | 1098 | tx_crawler_domain_events_dispatcher::getInstance()->post('urlAddedToQueue',$this->setID,array('uid' => $uid, 'fieldArray' => $fieldArray)); |
1095 | - }else{ |
|
1099 | + } else{ |
|
1096 | 1100 | tx_crawler_domain_events_dispatcher::getInstance()->post('duplicateUrlInQueue',$this->setID,array('rows' => $rows, 'fieldArray' => $fieldArray)); |
1097 | 1101 | } |
1098 | 1102 | } |
@@ -1122,7 +1126,7 @@ discard block |
||
1122 | 1126 | $timeBegin = $currentTime - 100; |
1123 | 1127 | $timeEnd = $currentTime + 100; |
1124 | 1128 | $where = ' ((scheduled BETWEEN '.$timeBegin.' AND '.$timeEnd.' ) OR scheduled <= '. $currentTime.') '; |
1125 | - }else{ |
|
1129 | + } else{ |
|
1126 | 1130 | $where = 'scheduled <= ' . $currentTime; |
1127 | 1131 | } |
1128 | 1132 | } elseif ($tstamp > $currentTime) { |
@@ -1314,18 +1318,24 @@ discard block |
||
1314 | 1318 | */ |
1315 | 1319 | function requestUrl($originalUrl, $crawlerId, $timeout=2, $recursion=10) { |
1316 | 1320 | |
1317 | - if (!$recursion) return false; |
|
1321 | + if (!$recursion) { |
|
1322 | + return false; |
|
1323 | + } |
|
1318 | 1324 | |
1319 | 1325 | // Parse URL, checking for scheme: |
1320 | 1326 | $url = parse_url($originalUrl); |
1321 | 1327 | |
1322 | 1328 | if ($url === FALSE) { |
1323 | - if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Could not parse_url() for string "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1329 | + if (TYPO3_DLOG) { |
|
1330 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Could not parse_url() for string "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1331 | + } |
|
1324 | 1332 | return FALSE; |
1325 | 1333 | } |
1326 | 1334 | |
1327 | 1335 | if (!in_array($url['scheme'], array('','http','https'))) { |
1328 | - if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Scheme does not match for url "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1336 | + if (TYPO3_DLOG) { |
|
1337 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Scheme does not match for url "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1338 | + } |
|
1329 | 1339 | return FALSE; |
1330 | 1340 | } |
1331 | 1341 | |
@@ -1359,7 +1369,9 @@ discard block |
||
1359 | 1369 | $fp = fsockopen($host, $port, $errno, $errstr, $timeout); |
1360 | 1370 | |
1361 | 1371 | if (!$fp) { |
1362 | - if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1372 | + if (TYPO3_DLOG) { |
|
1373 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1374 | + } |
|
1363 | 1375 | return FALSE; |
1364 | 1376 | } else { |
1365 | 1377 | // Request message: |
@@ -1387,7 +1399,9 @@ discard block |
||
1387 | 1399 | if (is_array($newRequestUrl)) { |
1388 | 1400 | $result = array_merge(array('parentRequest'=>$result), $newRequestUrl); |
1389 | 1401 | } else { |
1390 | - if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1402 | + if (TYPO3_DLOG) { |
|
1403 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1404 | + } |
|
1391 | 1405 | return FALSE; |
1392 | 1406 | } |
1393 | 1407 | } |
@@ -1510,20 +1524,32 @@ discard block |
||
1510 | 1524 | * @return string URL from redirection |
1511 | 1525 | */ |
1512 | 1526 | protected function getRequestUrlFrom302Header($headers,$user='',$pass='') { |
1513 | - if(!is_array($headers)) return false; |
|
1514 | - if(!(stristr($headers[0],'301 Moved') || stristr($headers[0],'302 Found') || stristr($headers[0],'302 Moved'))) return false; |
|
1527 | + if(!is_array($headers)) { |
|
1528 | + return false; |
|
1529 | + } |
|
1530 | + if(!(stristr($headers[0],'301 Moved') || stristr($headers[0],'302 Found') || stristr($headers[0],'302 Moved'))) { |
|
1531 | + return false; |
|
1532 | + } |
|
1515 | 1533 | |
1516 | 1534 | foreach($headers as $hl) { |
1517 | 1535 | $tmp = explode(": ",$hl); |
1518 | 1536 | $header[trim($tmp[0])] = trim($tmp[1]); |
1519 | - if(trim($tmp[0])=='Location') break; |
|
1537 | + if(trim($tmp[0])=='Location') { |
|
1538 | + break; |
|
1539 | + } |
|
1540 | + } |
|
1541 | + if(!array_key_exists('Location',$header)) { |
|
1542 | + return false; |
|
1520 | 1543 | } |
1521 | - if(!array_key_exists('Location',$header)) return false; |
|
1522 | 1544 | |
1523 | 1545 | if($user!='') { |
1524 | - if(!($tmp = parse_url($header['Location']))) return false; |
|
1546 | + if(!($tmp = parse_url($header['Location']))) { |
|
1547 | + return false; |
|
1548 | + } |
|
1525 | 1549 | $newUrl = $tmp['scheme'] . '://' . $user . ':' . $pass . '@' . $tmp['host'] . $tmp['path']; |
1526 | - if($tmp['query']!='') $newUrl .= '?' . $tmp['query']; |
|
1550 | + if($tmp['query']!='') { |
|
1551 | + $newUrl .= '?' . $tmp['query']; |
|
1552 | + } |
|
1527 | 1553 | } else { |
1528 | 1554 | $newUrl = $header['Location']; |
1529 | 1555 | } |
@@ -1974,7 +2000,7 @@ discard block |
||
1974 | 2000 | $configurations = $this->getUrlsForPageId($pageId); |
1975 | 2001 | if(is_array($configurations)){ |
1976 | 2002 | $configurationKeys = array_keys($configurations); |
1977 | - }else{ |
|
2003 | + } else{ |
|
1978 | 2004 | $configurationKeys = array(); |
1979 | 2005 | } |
1980 | 2006 | } |
@@ -2306,7 +2332,9 @@ discard block |
||
2306 | 2332 | return false; //nothing to release |
2307 | 2333 | } |
2308 | 2334 | |
2309 | - if(!$withinLock) $this->db->sql_query('BEGIN'); |
|
2335 | + if(!$withinLock) { |
|
2336 | + $this->db->sql_query('BEGIN'); |
|
2337 | + } |
|
2310 | 2338 | |
2311 | 2339 | // some kind of 2nd chance algo - this way you need at least 2 processes to have a real cleanup |
2312 | 2340 | // this ensures that a single process can't mess up the entire process table |
@@ -2350,7 +2378,9 @@ discard block |
||
2350 | 2378 | ) |
2351 | 2379 | ); |
2352 | 2380 | |
2353 | - if(!$withinLock) $this->db->sql_query('COMMIT'); |
|
2381 | + if(!$withinLock) { |
|
2382 | + $this->db->sql_query('COMMIT'); |
|
2383 | + } |
|
2354 | 2384 | |
2355 | 2385 | return true; |
2356 | 2386 | } |
@@ -32,8 +32,8 @@ discard block |
||
32 | 32 | class tx_crawler_lib { |
33 | 33 | |
34 | 34 | var $setID = 0; |
35 | - var $processID =''; |
|
36 | - var $max_CLI_exec_time = 3600; // One hour is max stalled time for the CLI (If the process has had the status "start" for 3600 seconds it will be regarded stalled and a new process is started. |
|
35 | + var $processID = ''; |
|
36 | + var $max_CLI_exec_time = 3600; // One hour is max stalled time for the CLI (If the process has had the status "start" for 3600 seconds it will be regarded stalled and a new process is started. |
|
37 | 37 | |
38 | 38 | var $duplicateTrack = array(); |
39 | 39 | var $downloadUrls = array(); |
@@ -46,9 +46,9 @@ discard block |
||
46 | 46 | var $queueEntries = array(); |
47 | 47 | var $urlList = array(); |
48 | 48 | |
49 | - var $debugMode=FALSE; |
|
49 | + var $debugMode = FALSE; |
|
50 | 50 | |
51 | - var $extensionSettings=array(); |
|
51 | + var $extensionSettings = array(); |
|
52 | 52 | |
53 | 53 | var $MP = false; // mount point |
54 | 54 | |
@@ -72,9 +72,9 @@ discard block |
||
72 | 72 | private $backendUser; |
73 | 73 | |
74 | 74 | const CLI_STATUS_NOTHING_PROCCESSED = 0; |
75 | - const CLI_STATUS_REMAIN = 1; //queue not empty |
|
76 | - const CLI_STATUS_PROCESSED = 2; //(some) queue items where processed |
|
77 | - const CLI_STATUS_ABORTED = 4; //instance didn't finish |
|
75 | + const CLI_STATUS_REMAIN = 1; //queue not empty |
|
76 | + const CLI_STATUS_PROCESSED = 2; //(some) queue items where processed |
|
77 | + const CLI_STATUS_ABORTED = 4; //instance didn't finish |
|
78 | 78 | const CLI_STATUS_POLLABLE_PROCESSED = 8; |
79 | 79 | |
80 | 80 | /** |
@@ -165,7 +165,7 @@ discard block |
||
165 | 165 | $this->extensionSettings['countInARun'] = 100; |
166 | 166 | } |
167 | 167 | |
168 | - $this->extensionSettings['processLimit'] = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($this->extensionSettings['processLimit'],1,99,1); |
|
168 | + $this->extensionSettings['processLimit'] = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($this->extensionSettings['processLimit'], 1, 99, 1); |
|
169 | 169 | } |
170 | 170 | |
171 | 171 | /** |
@@ -199,7 +199,7 @@ discard block |
||
199 | 199 | } |
200 | 200 | |
201 | 201 | if (!$skipPage) { |
202 | - if (\TYPO3\CMS\Core\Utility\GeneralUtility::inList('3,4', $pageRow['doktype']) || $pageRow['doktype']>=199) { |
|
202 | + if (\TYPO3\CMS\Core\Utility\GeneralUtility::inList('3,4', $pageRow['doktype']) || $pageRow['doktype'] >= 199) { |
|
203 | 203 | $skipPage = true; |
204 | 204 | $skipMessage = 'Because doktype is not allowed'; |
205 | 205 | } |
@@ -220,13 +220,13 @@ discard block |
||
220 | 220 | if (!$skipPage) { |
221 | 221 | // veto hook |
222 | 222 | if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pageVeto'])) { |
223 | - foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pageVeto'] as $key => $func) { |
|
223 | + foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pageVeto'] as $key => $func) { |
|
224 | 224 | $params = array( |
225 | 225 | 'pageRow' => $pageRow |
226 | 226 | ); |
227 | 227 | // expects "false" if page is ok and "true" or a skipMessage if this page should _not_ be crawled |
228 | 228 | $veto = \TYPO3\CMS\Core\Utility\GeneralUtility::callUserFunction($func, $params, $this); |
229 | - if ($veto !== false) { |
|
229 | + if ($veto !== false) { |
|
230 | 230 | $skipPage = true; |
231 | 231 | if (is_string($veto)) { |
232 | 232 | $skipMessage = $veto; |
@@ -276,9 +276,9 @@ discard block |
||
276 | 276 | * @param string $configurationHash |
277 | 277 | * @return boolean |
278 | 278 | */ |
279 | - protected function noUnprocessedQueueEntriesForPageWithConfigurationHashExist($uid,$configurationHash) { |
|
280 | - $configurationHash = $this->db->fullQuoteStr($configurationHash,'tx_crawler_queue'); |
|
281 | - $res = $this->db->exec_SELECTquery('count(*) as anz','tx_crawler_queue',"page_id=".intval($uid)." AND configuration_hash=".$configurationHash." AND exec_time=0"); |
|
279 | + protected function noUnprocessedQueueEntriesForPageWithConfigurationHashExist($uid, $configurationHash) { |
|
280 | + $configurationHash = $this->db->fullQuoteStr($configurationHash, 'tx_crawler_queue'); |
|
281 | + $res = $this->db->exec_SELECTquery('count(*) as anz', 'tx_crawler_queue', "page_id=".intval($uid)." AND configuration_hash=".$configurationHash." AND exec_time=0"); |
|
282 | 282 | $row = $this->db->sql_fetch_assoc($res); |
283 | 283 | |
284 | 284 | return ($row['anz'] == 0); |
@@ -343,26 +343,26 @@ discard block |
||
343 | 343 | } |
344 | 344 | } |
345 | 345 | |
346 | - if (is_array($vv['URLs'])) { |
|
347 | - $configurationHash = md5(serialize($vv)); |
|
348 | - $skipInnerCheck = $this->noUnprocessedQueueEntriesForPageWithConfigurationHashExist($pageRow['uid'],$configurationHash); |
|
346 | + if (is_array($vv['URLs'])) { |
|
347 | + $configurationHash = md5(serialize($vv)); |
|
348 | + $skipInnerCheck = $this->noUnprocessedQueueEntriesForPageWithConfigurationHashExist($pageRow['uid'], $configurationHash); |
|
349 | 349 | |
350 | - foreach($vv['URLs'] as $urlQuery) { |
|
350 | + foreach ($vv['URLs'] as $urlQuery) { |
|
351 | 351 | |
352 | - if ($this->drawURLs_PIfilter($vv['subCfg']['procInstrFilter'], $incomingProcInstructions)) { |
|
352 | + if ($this->drawURLs_PIfilter($vv['subCfg']['procInstrFilter'], $incomingProcInstructions)) { |
|
353 | 353 | |
354 | 354 | // Calculate cHash: |
355 | - if ($vv['subCfg']['cHash']) { |
|
355 | + if ($vv['subCfg']['cHash']) { |
|
356 | 356 | /* @var $cacheHash \TYPO3\CMS\Frontend\Page\CacheHashCalculator */ |
357 | 357 | $cacheHash = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\CMS\Frontend\Page\CacheHashCalculator'); |
358 | - $urlQuery .= '&cHash=' . $cacheHash->generateForParameters($urlQuery); |
|
358 | + $urlQuery .= '&cHash='.$cacheHash->generateForParameters($urlQuery); |
|
359 | 359 | } |
360 | 360 | |
361 | 361 | // Create key by which to determine unique-ness: |
362 | 362 | $uKey = $urlQuery.'|'.$vv['subCfg']['userGroups'].'|'.$vv['subCfg']['baseUrl'].'|'.$vv['subCfg']['procInstrFilter']; |
363 | 363 | |
364 | 364 | // realurl support (thanks to Ingo Renner) |
365 | - $urlQuery = 'index.php' . $urlQuery; |
|
365 | + $urlQuery = 'index.php'.$urlQuery; |
|
366 | 366 | if (\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::isLoaded('realurl') && $vv['subCfg']['realurl']) { |
367 | 367 | $params = array( |
368 | 368 | 'LD' => array( |
@@ -375,8 +375,8 @@ discard block |
||
375 | 375 | } |
376 | 376 | |
377 | 377 | // Scheduled time: |
378 | - $schTime = $scheduledTime + round(count($duplicateTrack)*(60/$reqMinute)); |
|
379 | - $schTime = floor($schTime/60)*60; |
|
378 | + $schTime = $scheduledTime + round(count($duplicateTrack) * (60 / $reqMinute)); |
|
379 | + $schTime = floor($schTime / 60) * 60; |
|
380 | 380 | |
381 | 381 | if (isset($duplicateTrack[$uKey])) { |
382 | 382 | |
@@ -388,10 +388,10 @@ discard block |
||
388 | 388 | $urlList = '['.date('d.m.y H:i', $schTime).'] '.htmlspecialchars($urlQuery); |
389 | 389 | $this->urlList[] = '['.date('d.m.y H:i', $schTime).'] '.$urlQuery; |
390 | 390 | |
391 | - $theUrl = ($vv['subCfg']['baseUrl'] ? $vv['subCfg']['baseUrl'] : \TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_URL')) . $urlQuery; |
|
391 | + $theUrl = ($vv['subCfg']['baseUrl'] ? $vv['subCfg']['baseUrl'] : \TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_URL')).$urlQuery; |
|
392 | 392 | |
393 | 393 | // Submit for crawling! |
394 | - if ($submitCrawlUrls) { |
|
394 | + if ($submitCrawlUrls) { |
|
395 | 395 | $added = $this->addUrl( |
396 | 396 | $pageRow['uid'], |
397 | 397 | $theUrl, |
@@ -403,7 +403,7 @@ discard block |
||
403 | 403 | if ($added === false) { |
404 | 404 | $urlList .= ' (Url already existed)'; |
405 | 405 | } |
406 | - } elseif ($downloadCrawlUrls) { |
|
406 | + } elseif ($downloadCrawlUrls) { |
|
407 | 407 | $downloadUrls[$theUrl] = $theUrl; |
408 | 408 | } |
409 | 409 | |
@@ -431,7 +431,7 @@ discard block |
||
431 | 431 | return TRUE; |
432 | 432 | } |
433 | 433 | |
434 | - foreach($incomingProcInstructions as $pi) { |
|
434 | + foreach ($incomingProcInstructions as $pi) { |
|
435 | 435 | if (\TYPO3\CMS\Core\Utility\GeneralUtility::inList($piString, $pi)) { |
436 | 436 | return TRUE; |
437 | 437 | } |
@@ -440,7 +440,7 @@ discard block |
||
440 | 440 | |
441 | 441 | |
442 | 442 | public function getPageTSconfigForId($id) { |
443 | - if(!$this->MP){ |
|
443 | + if (!$this->MP) { |
|
444 | 444 | $pageTSconfig = \TYPO3\CMS\Backend\Utility\BackendUtility::getPagesTSconfig($id); |
445 | 445 | } else { |
446 | 446 | list(,$mountPointId) = explode('-', $this->MP); |
@@ -480,24 +480,24 @@ discard block |
||
480 | 480 | |
481 | 481 | $res = array(); |
482 | 482 | |
483 | - if (is_array($pageTSconfig) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.'])) { |
|
483 | + if (is_array($pageTSconfig) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.'])) { |
|
484 | 484 | $crawlerCfg = $pageTSconfig['tx_crawler.']['crawlerCfg.']; |
485 | 485 | |
486 | - if (is_array($crawlerCfg['paramSets.'])) { |
|
487 | - foreach($crawlerCfg['paramSets.'] as $key => $values) { |
|
488 | - if (!is_array($values)) { |
|
486 | + if (is_array($crawlerCfg['paramSets.'])) { |
|
487 | + foreach ($crawlerCfg['paramSets.'] as $key => $values) { |
|
488 | + if (!is_array($values)) { |
|
489 | 489 | |
490 | 490 | // Sub configuration for a single configuration string: |
491 | - $subCfg = (array)$crawlerCfg['paramSets.'][$key.'.']; |
|
491 | + $subCfg = (array) $crawlerCfg['paramSets.'][$key.'.']; |
|
492 | 492 | $subCfg['key'] = $key; |
493 | 493 | |
494 | - if (strcmp($subCfg['procInstrFilter'],'')) { |
|
495 | - $subCfg['procInstrFilter'] = implode(',',\TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$subCfg['procInstrFilter'])); |
|
494 | + if (strcmp($subCfg['procInstrFilter'], '')) { |
|
495 | + $subCfg['procInstrFilter'] = implode(',', \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $subCfg['procInstrFilter'])); |
|
496 | 496 | } |
497 | - $pidOnlyList = implode(',',\TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$subCfg['pidsOnly'],1)); |
|
497 | + $pidOnlyList = implode(',', \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $subCfg['pidsOnly'], 1)); |
|
498 | 498 | |
499 | 499 | // process configuration if it is not page-specific or if the specific page is the current page: |
500 | - if (!strcmp($subCfg['pidsOnly'],'') || \TYPO3\CMS\Core\Utility\GeneralUtility::inList($pidOnlyList,$id)) { |
|
500 | + if (!strcmp($subCfg['pidsOnly'], '') || \TYPO3\CMS\Core\Utility\GeneralUtility::inList($pidOnlyList, $id)) { |
|
501 | 501 | |
502 | 502 | // add trailing slash if not present |
503 | 503 | if (!empty($subCfg['baseUrl']) && substr($subCfg['baseUrl'], -1) != '/') { |
@@ -508,14 +508,14 @@ discard block |
||
508 | 508 | $res[$key] = array(); |
509 | 509 | $res[$key]['subCfg'] = $subCfg; |
510 | 510 | $res[$key]['paramParsed'] = $this->parseParams($values); |
511 | - $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'],$id); |
|
511 | + $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $id); |
|
512 | 512 | $res[$key]['origin'] = 'pagets'; |
513 | 513 | |
514 | 514 | // recognize MP value |
515 | - if(!$this->MP){ |
|
516 | - $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'],array('?id='.$id)); |
|
515 | + if (!$this->MP) { |
|
516 | + $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], array('?id='.$id)); |
|
517 | 517 | } else { |
518 | - $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'],array('?id='.$id.'&MP='.$this->MP)); |
|
518 | + $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], array('?id='.$id.'&MP='.$this->MP)); |
|
519 | 519 | } |
520 | 520 | } |
521 | 521 | } |
@@ -536,7 +536,7 @@ discard block |
||
536 | 536 | 'tx_crawler_configuration', |
537 | 537 | 'pid', |
538 | 538 | intval($page['uid']), |
539 | - \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_crawler_configuration') . \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('tx_crawler_configuration') |
|
539 | + \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_crawler_configuration').\TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('tx_crawler_configuration') |
|
540 | 540 | ); |
541 | 541 | |
542 | 542 | if (is_array($configurationRecordsForCurrentPage)) { |
@@ -545,10 +545,10 @@ discard block |
||
545 | 545 | // check access to the configuration record |
546 | 546 | if (empty($configurationRecord['begroups']) || $GLOBALS['BE_USER']->isAdmin() || $this->hasGroupAccess($GLOBALS['BE_USER']->user['usergroup_cached_list'], $configurationRecord['begroups'])) { |
547 | 547 | |
548 | - $pidOnlyList = implode(',',\TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$configurationRecord['pidsonly'],1)); |
|
548 | + $pidOnlyList = implode(',', \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $configurationRecord['pidsonly'], 1)); |
|
549 | 549 | |
550 | 550 | // process configuration if it is not page-specific or if the specific page is the current page: |
551 | - if (!strcmp($configurationRecord['pidsonly'],'') || \TYPO3\CMS\Core\Utility\GeneralUtility::inList($pidOnlyList,$id)) { |
|
551 | + if (!strcmp($configurationRecord['pidsonly'], '') || \TYPO3\CMS\Core\Utility\GeneralUtility::inList($pidOnlyList, $id)) { |
|
552 | 552 | $key = $configurationRecord['name']; |
553 | 553 | |
554 | 554 | // don't overwrite previously defined paramSets |
@@ -582,7 +582,7 @@ discard block |
||
582 | 582 | $res[$key]['subCfg'] = $subCfg; |
583 | 583 | $res[$key]['paramParsed'] = $this->parseParams($configurationRecord['configuration']); |
584 | 584 | $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $id); |
585 | - $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], array('?id=' . $id)); |
|
585 | + $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], array('?id='.$id)); |
|
586 | 586 | $res[$key]['origin'] = 'tx_crawler_configuration_'.$configurationRecord['uid']; |
587 | 587 | } |
588 | 588 | } |
@@ -592,8 +592,8 @@ discard block |
||
592 | 592 | } |
593 | 593 | } |
594 | 594 | |
595 | - if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'])) { |
|
596 | - foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'] as $func) { |
|
595 | + if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'])) { |
|
596 | + foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'] as $func) { |
|
597 | 597 | $params = array( |
598 | 598 | 'res' => &$res, |
599 | 599 | ); |
@@ -620,13 +620,13 @@ discard block |
||
620 | 620 | $res = $this->db->exec_SELECTquery( |
621 | 621 | '*', |
622 | 622 | 'sys_domain', |
623 | - 'uid = '.$sysDomainUid . |
|
624 | - \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('sys_domain') . |
|
623 | + 'uid = '.$sysDomainUid. |
|
624 | + \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('sys_domain'). |
|
625 | 625 | \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('sys_domain') |
626 | 626 | ); |
627 | 627 | $row = $this->db->sql_fetch_assoc($res); |
628 | 628 | if ($row['domainName'] != '') { |
629 | - return $urlScheme .'://'. $row['domainName']; |
|
629 | + return $urlScheme.'://'.$row['domainName']; |
|
630 | 630 | } |
631 | 631 | } |
632 | 632 | return $baseUrl; |
@@ -637,28 +637,28 @@ discard block |
||
637 | 637 | $configurationsForBranch = array(); |
638 | 638 | |
639 | 639 | $pageTSconfig = $this->getPageTSconfigForId($rootid); |
640 | - if (is_array($pageTSconfig) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.']) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.'])) { |
|
640 | + if (is_array($pageTSconfig) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.']) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.'])) { |
|
641 | 641 | |
642 | 642 | $sets = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.']; |
643 | - if(is_array($sets)) { |
|
644 | - foreach($sets as $key=>$value) { |
|
645 | - if(!is_array($value)) continue; |
|
646 | - $configurationsForBranch[] = substr($key,-1)=='.'?substr($key,0,-1):$key; |
|
643 | + if (is_array($sets)) { |
|
644 | + foreach ($sets as $key=>$value) { |
|
645 | + if (!is_array($value)) continue; |
|
646 | + $configurationsForBranch[] = substr($key, -1) == '.' ?substr($key, 0, -1) : $key; |
|
647 | 647 | } |
648 | 648 | |
649 | 649 | } |
650 | 650 | } |
651 | 651 | $pids = array(); |
652 | 652 | $rootLine = \TYPO3\CMS\Backend\Utility\BackendUtility::BEgetRootLine($rootid); |
653 | - foreach($rootLine as $node) { |
|
653 | + foreach ($rootLine as $node) { |
|
654 | 654 | $pids[] = $node['uid']; |
655 | 655 | } |
656 | 656 | /* @var \TYPO3\CMS\Backend\Tree\View\PageTreeView */ |
657 | 657 | $tree = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\CMS\Backend\Tree\View\PageTreeView'); |
658 | 658 | $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1); |
659 | - $tree->init('AND ' . $perms_clause); |
|
659 | + $tree->init('AND '.$perms_clause); |
|
660 | 660 | $tree->getTree($rootid, $depth, ''); |
661 | - foreach($tree->tree as $node) { |
|
661 | + foreach ($tree->tree as $node) { |
|
662 | 662 | $pids[] = $node['row']['uid']; |
663 | 663 | } |
664 | 664 | |
@@ -666,12 +666,12 @@ discard block |
||
666 | 666 | '*', |
667 | 667 | 'tx_crawler_configuration', |
668 | 668 | 'pid IN ('.implode(',', $pids).') '. |
669 | - \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_crawler_configuration') . |
|
669 | + \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_crawler_configuration'). |
|
670 | 670 | \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('tx_crawler_configuration').' '. |
671 | 671 | \TYPO3\CMS\Backend\Utility\BackendUtility::versioningPlaceholderClause('tx_crawler_configuration').' ' |
672 | 672 | ); |
673 | 673 | |
674 | - while($row = $this->db->sql_fetch_assoc($res)) { |
|
674 | + while ($row = $this->db->sql_fetch_assoc($res)) { |
|
675 | 675 | $configurationsForBranch[] = $row['name']; |
676 | 676 | } |
677 | 677 | $this->db->sql_free_result($res); |
@@ -693,7 +693,7 @@ discard block |
||
693 | 693 | if (empty($accessList)) { |
694 | 694 | return true; |
695 | 695 | } |
696 | - foreach(\TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',', $groupList) as $groupUid) { |
|
696 | + foreach (\TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',', $groupList) as $groupUid) { |
|
697 | 697 | if (\TYPO3\CMS\Core\Utility\GeneralUtility::inList($accessList, $groupUid)) { |
698 | 698 | return true; |
699 | 699 | } |
@@ -712,9 +712,9 @@ discard block |
||
712 | 712 | $paramKeyValues = array(); |
713 | 713 | $GETparams = explode('&', $inputQuery); |
714 | 714 | |
715 | - foreach($GETparams as $paramAndValue) { |
|
716 | - list($p,$v) = explode('=', $paramAndValue, 2); |
|
717 | - if (strlen($p)) { |
|
715 | + foreach ($GETparams as $paramAndValue) { |
|
716 | + list($p, $v) = explode('=', $paramAndValue, 2); |
|
717 | + if (strlen($p)) { |
|
718 | 718 | $paramKeyValues[rawurldecode($p)] = rawurldecode($v); |
719 | 719 | } |
720 | 720 | } |
@@ -737,84 +737,84 @@ discard block |
||
737 | 737 | * @param integer Current page ID |
738 | 738 | * @return array Array with key (GET var name) with the value being an array of all possible values for that key. |
739 | 739 | */ |
740 | - function expandParameters($paramArray, $pid) { |
|
740 | + function expandParameters($paramArray, $pid) { |
|
741 | 741 | global $TCA; |
742 | 742 | |
743 | 743 | // Traverse parameter names: |
744 | - foreach($paramArray as $p => $v) { |
|
744 | + foreach ($paramArray as $p => $v) { |
|
745 | 745 | $v = trim($v); |
746 | 746 | |
747 | 747 | // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal |
748 | - if (substr($v,0,1)==='[' && substr($v,-1)===']') { |
|
748 | + if (substr($v, 0, 1) === '[' && substr($v, -1) === ']') { |
|
749 | 749 | // So, find the value inside brackets and reset the paramArray value as an array. |
750 | - $v = substr($v,1,-1); |
|
750 | + $v = substr($v, 1, -1); |
|
751 | 751 | $paramArray[$p] = array(); |
752 | 752 | |
753 | 753 | // Explode parts and traverse them: |
754 | - $parts = explode('|',$v); |
|
755 | - foreach($parts as $pV) { |
|
754 | + $parts = explode('|', $v); |
|
755 | + foreach ($parts as $pV) { |
|
756 | 756 | |
757 | 757 | // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30) |
758 | - if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/',trim($pV),$reg)) { // Integer range: |
|
758 | + if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($pV), $reg)) { // Integer range: |
|
759 | 759 | |
760 | 760 | // Swap if first is larger than last: |
761 | - if ($reg[1] > $reg[2]) { |
|
761 | + if ($reg[1] > $reg[2]) { |
|
762 | 762 | $temp = $reg[2]; |
763 | 763 | $reg[2] = $reg[1]; |
764 | 764 | $reg[1] = $temp; |
765 | 765 | } |
766 | 766 | |
767 | 767 | // Traverse range, add values: |
768 | - $runAwayBrake = 1000; // Limit to size of range! |
|
769 | - for($a=$reg[1]; $a<=$reg[2];$a++) { |
|
768 | + $runAwayBrake = 1000; // Limit to size of range! |
|
769 | + for ($a = $reg[1]; $a <= $reg[2]; $a++) { |
|
770 | 770 | $paramArray[$p][] = $a; |
771 | 771 | $runAwayBrake--; |
772 | - if ($runAwayBrake<=0) { |
|
772 | + if ($runAwayBrake <= 0) { |
|
773 | 773 | break; |
774 | 774 | } |
775 | 775 | } |
776 | - } elseif (substr(trim($pV),0,7)=='_TABLE:') { |
|
776 | + } elseif (substr(trim($pV), 0, 7) == '_TABLE:') { |
|
777 | 777 | |
778 | 778 | // Parse parameters: |
779 | - $subparts = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(';',$pV); |
|
779 | + $subparts = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(';', $pV); |
|
780 | 780 | $subpartParams = array(); |
781 | - foreach($subparts as $spV) { |
|
782 | - list($pKey,$pVal) = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(':',$spV); |
|
781 | + foreach ($subparts as $spV) { |
|
782 | + list($pKey, $pVal) = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(':', $spV); |
|
783 | 783 | $subpartParams[$pKey] = $pVal; |
784 | 784 | } |
785 | 785 | |
786 | 786 | // Table exists: |
787 | - if (isset($TCA[$subpartParams['_TABLE']])) { |
|
787 | + if (isset($TCA[$subpartParams['_TABLE']])) { |
|
788 | 788 | $lookUpPid = isset($subpartParams['_PID']) ? intval($subpartParams['_PID']) : $pid; |
789 | 789 | $pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid'; |
790 | 790 | $where = isset($subpartParams['_WHERE']) ? $subpartParams['_WHERE'] : ''; |
791 | 791 | $addTable = isset($subpartParams['_ADDTABLE']) ? $subpartParams['_ADDTABLE'] : ''; |
792 | 792 | |
793 | 793 | $fieldName = $subpartParams['_FIELD'] ? $subpartParams['_FIELD'] : 'uid'; |
794 | - if ($fieldName==='uid' || $TCA[$subpartParams['_TABLE']]['columns'][$fieldName]) { |
|
794 | + if ($fieldName === 'uid' || $TCA[$subpartParams['_TABLE']]['columns'][$fieldName]) { |
|
795 | 795 | |
796 | 796 | $andWhereLanguage = ''; |
797 | 797 | $transOrigPointerField = $TCA[$subpartParams['_TABLE']]['ctrl']['transOrigPointerField']; |
798 | 798 | |
799 | 799 | if ($subpartParams['_ENABLELANG'] && $transOrigPointerField) { |
800 | - $andWhereLanguage = ' AND ' . $this->db->quoteStr($transOrigPointerField, $subpartParams['_TABLE']) .' <= 0 '; |
|
800 | + $andWhereLanguage = ' AND '.$this->db->quoteStr($transOrigPointerField, $subpartParams['_TABLE']).' <= 0 '; |
|
801 | 801 | } |
802 | 802 | |
803 | - $where = $this->db->quoteStr($pidField, $subpartParams['_TABLE']) .'='.intval($lookUpPid) . ' ' . |
|
804 | - $andWhereLanguage . $where; |
|
803 | + $where = $this->db->quoteStr($pidField, $subpartParams['_TABLE']).'='.intval($lookUpPid).' '. |
|
804 | + $andWhereLanguage.$where; |
|
805 | 805 | |
806 | 806 | $rows = $this->db->exec_SELECTgetRows( |
807 | 807 | $fieldName, |
808 | - $subpartParams['_TABLE'] . $addTable, |
|
809 | - $where . \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause($subpartParams['_TABLE']), |
|
808 | + $subpartParams['_TABLE'].$addTable, |
|
809 | + $where.\TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause($subpartParams['_TABLE']), |
|
810 | 810 | '', |
811 | 811 | '', |
812 | 812 | '', |
813 | 813 | $fieldName |
814 | 814 | ); |
815 | 815 | |
816 | - if (is_array($rows)) { |
|
817 | - $paramArray[$p] = array_merge($paramArray[$p],array_keys($rows)); |
|
816 | + if (is_array($rows)) { |
|
817 | + $paramArray[$p] = array_merge($paramArray[$p], array_keys($rows)); |
|
818 | 818 | } |
819 | 819 | } |
820 | 820 | } |
@@ -830,7 +830,7 @@ discard block |
||
830 | 830 | 'currentValue' => $pV, |
831 | 831 | 'pid' => $pid |
832 | 832 | ); |
833 | - foreach($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $key => $_funcRef) { |
|
833 | + foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $key => $_funcRef) { |
|
834 | 834 | \TYPO3\CMS\Core\Utility\GeneralUtility::callUserFunction($_funcRef, $_params, $this); |
835 | 835 | } |
836 | 836 | } |
@@ -866,11 +866,11 @@ discard block |
||
866 | 866 | |
867 | 867 | // Traverse value set: |
868 | 868 | $newUrls = array(); |
869 | - foreach($urls as $url) { |
|
870 | - foreach($valueSet as $val) { |
|
871 | - $newUrls[] = $url.(strcmp($val,'') ? '&'.rawurlencode($varName).'='.rawurlencode($val) : ''); |
|
869 | + foreach ($urls as $url) { |
|
870 | + foreach ($valueSet as $val) { |
|
871 | + $newUrls[] = $url.(strcmp($val, '') ? '&'.rawurlencode($varName).'='.rawurlencode($val) : ''); |
|
872 | 872 | |
873 | - if (count($newUrls) > \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000)) { |
|
873 | + if (count($newUrls) > \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000)) { |
|
874 | 874 | break; |
875 | 875 | } |
876 | 876 | } |
@@ -900,7 +900,7 @@ discard block |
||
900 | 900 | */ |
901 | 901 | public function getLogEntriesForPageId($id, $filter = '', $doFlush = FALSE, $doFullFlush = FALSE, $itemsPerPage = 10) { |
902 | 902 | // FIXME: Write Unit tests for Filters |
903 | - switch($filter) { |
|
903 | + switch ($filter) { |
|
904 | 904 | case 'pending': |
905 | 905 | $addWhere = ' AND exec_time=0'; |
906 | 906 | break; |
@@ -914,13 +914,13 @@ discard block |
||
914 | 914 | |
915 | 915 | // FIXME: Write unit test that ensures that the right records are deleted. |
916 | 916 | if ($doFlush) { |
917 | - $this->flushQueue( ($doFullFlush?'1=1':('page_id='.intval($id))) .$addWhere); |
|
917 | + $this->flushQueue(($doFullFlush ? '1=1' : ('page_id='.intval($id))).$addWhere); |
|
918 | 918 | return array(); |
919 | 919 | } else { |
920 | 920 | return $this->db->exec_SELECTgetRows('*', |
921 | 921 | 'tx_crawler_queue', |
922 | - 'page_id=' . intval($id) . $addWhere, '', 'scheduled DESC', |
|
923 | - (intval($itemsPerPage)>0 ? intval($itemsPerPage) : '')); |
|
922 | + 'page_id='.intval($id).$addWhere, '', 'scheduled DESC', |
|
923 | + (intval($itemsPerPage) > 0 ? intval($itemsPerPage) : '')); |
|
924 | 924 | } |
925 | 925 | } |
926 | 926 | |
@@ -933,9 +933,9 @@ discard block |
||
933 | 933 | * @param integer Limit the amount of entires per page default is 10 |
934 | 934 | * @return array |
935 | 935 | */ |
936 | - public function getLogEntriesForSetId($set_id,$filter='',$doFlush=FALSE, $doFullFlush=FALSE, $itemsPerPage=10) { |
|
936 | + public function getLogEntriesForSetId($set_id, $filter = '', $doFlush = FALSE, $doFullFlush = FALSE, $itemsPerPage = 10) { |
|
937 | 937 | // FIXME: Write Unit tests for Filters |
938 | - switch($filter) { |
|
938 | + switch ($filter) { |
|
939 | 939 | case 'pending': |
940 | 940 | $addWhere = ' AND exec_time=0'; |
941 | 941 | break; |
@@ -947,14 +947,14 @@ discard block |
||
947 | 947 | break; |
948 | 948 | } |
949 | 949 | // FIXME: Write unit test that ensures that the right records are deleted. |
950 | - if ($doFlush) { |
|
951 | - $this->flushQueue($doFullFlush?'':('set_id='.intval($set_id).$addWhere)); |
|
950 | + if ($doFlush) { |
|
951 | + $this->flushQueue($doFullFlush ? '' : ('set_id='.intval($set_id).$addWhere)); |
|
952 | 952 | return array(); |
953 | 953 | } else { |
954 | 954 | return $this->db->exec_SELECTgetRows('*', |
955 | 955 | 'tx_crawler_queue', |
956 | - 'set_id='.intval($set_id).$addWhere,'','scheduled DESC', |
|
957 | - (intval($itemsPerPage)>0 ? intval($itemsPerPage) : '')); |
|
956 | + 'set_id='.intval($set_id).$addWhere, '', 'scheduled DESC', |
|
957 | + (intval($itemsPerPage) > 0 ? intval($itemsPerPage) : '')); |
|
958 | 958 | } |
959 | 959 | } |
960 | 960 | |
@@ -964,14 +964,14 @@ discard block |
||
964 | 964 | * @param $where SQL related filter for the entries which should be removed |
965 | 965 | * @return void |
966 | 966 | */ |
967 | - protected function flushQueue($where='') { |
|
967 | + protected function flushQueue($where = '') { |
|
968 | 968 | |
969 | - $realWhere = strlen($where)>0?$where:'1=1'; |
|
969 | + $realWhere = strlen($where) > 0 ? $where : '1=1'; |
|
970 | 970 | |
971 | - if(tx_crawler_domain_events_dispatcher::getInstance()->hasObserver('queueEntryFlush')) { |
|
972 | - $groups = $this->db->exec_SELECTgetRows('DISTINCT set_id','tx_crawler_queue',$realWhere); |
|
973 | - foreach($groups as $group) { |
|
974 | - tx_crawler_domain_events_dispatcher::getInstance()->post('queueEntryFlush',$group['set_id'], $this->db->exec_SELECTgetRows('uid, set_id','tx_crawler_queue',$realWhere.' AND set_id="'.$group['set_id'].'"')); |
|
971 | + if (tx_crawler_domain_events_dispatcher::getInstance()->hasObserver('queueEntryFlush')) { |
|
972 | + $groups = $this->db->exec_SELECTgetRows('DISTINCT set_id', 'tx_crawler_queue', $realWhere); |
|
973 | + foreach ($groups as $group) { |
|
974 | + tx_crawler_domain_events_dispatcher::getInstance()->post('queueEntryFlush', $group['set_id'], $this->db->exec_SELECTgetRows('uid, set_id', 'tx_crawler_queue', $realWhere.' AND set_id="'.$group['set_id'].'"')); |
|
975 | 975 | } |
976 | 976 | } |
977 | 977 | |
@@ -988,7 +988,7 @@ discard block |
||
988 | 988 | * @param integer Time at which to activate |
989 | 989 | * @return void |
990 | 990 | */ |
991 | - function addQueueEntry_callBack($setId,$params,$callBack,$page_id=0,$schedule=0) { |
|
991 | + function addQueueEntry_callBack($setId, $params, $callBack, $page_id = 0, $schedule = 0) { |
|
992 | 992 | |
993 | 993 | if (!is_array($params)) $params = array(); |
994 | 994 | $params['_CALLBACKOBJ'] = $callBack; |
@@ -1003,7 +1003,7 @@ discard block |
||
1003 | 1003 | 'result_data' => '', |
1004 | 1004 | ); |
1005 | 1005 | |
1006 | - $this->db->exec_INSERTquery('tx_crawler_queue',$fieldArray); |
|
1006 | + $this->db->exec_INSERTquery('tx_crawler_queue', $fieldArray); |
|
1007 | 1007 | } |
1008 | 1008 | |
1009 | 1009 | |
@@ -1033,13 +1033,13 @@ discard block |
||
1033 | 1033 | * @param bool (optional) skip inner duplication check |
1034 | 1034 | * @return bool true if the url was added, false if it already existed |
1035 | 1035 | */ |
1036 | - function addUrl ( |
|
1036 | + function addUrl( |
|
1037 | 1037 | $id, |
1038 | 1038 | $url, |
1039 | 1039 | array $subCfg, |
1040 | 1040 | $tstamp, |
1041 | - $configurationHash='', |
|
1042 | - $skipInnerDuplicationCheck=false |
|
1041 | + $configurationHash = '', |
|
1042 | + $skipInnerDuplicationCheck = false |
|
1043 | 1043 | ) { |
1044 | 1044 | |
1045 | 1045 | $urlAdded = false; |
@@ -1050,14 +1050,14 @@ discard block |
||
1050 | 1050 | ); |
1051 | 1051 | |
1052 | 1052 | // fe user group simulation: |
1053 | - $uGs = implode(',',array_unique(\TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',',$subCfg['userGroups'],1))); |
|
1054 | - if ($uGs) { |
|
1053 | + $uGs = implode(',', array_unique(\TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',', $subCfg['userGroups'], 1))); |
|
1054 | + if ($uGs) { |
|
1055 | 1055 | $parameters['feUserGroupList'] = $uGs; |
1056 | 1056 | } |
1057 | 1057 | |
1058 | 1058 | // Setting processing instructions |
1059 | - $parameters['procInstructions'] = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$subCfg['procInstrFilter']); |
|
1060 | - if (is_array($subCfg['procInstrParams.'])) { |
|
1059 | + $parameters['procInstructions'] = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']); |
|
1060 | + if (is_array($subCfg['procInstrParams.'])) { |
|
1061 | 1061 | $parameters['procInstrParams'] = $subCfg['procInstrParams.']; |
1062 | 1062 | } |
1063 | 1063 | |
@@ -1076,14 +1076,14 @@ discard block |
||
1076 | 1076 | 'configuration' => $subCfg['key'], |
1077 | 1077 | ); |
1078 | 1078 | |
1079 | - if ($this->registerQueueEntriesInternallyOnly) { |
|
1079 | + if ($this->registerQueueEntriesInternallyOnly) { |
|
1080 | 1080 | //the entries will only be registered and not stored to the database |
1081 | 1081 | $this->queueEntries[] = $fieldArray; |
1082 | 1082 | } else { |
1083 | 1083 | |
1084 | - if(!$skipInnerDuplicationCheck){ |
|
1084 | + if (!$skipInnerDuplicationCheck) { |
|
1085 | 1085 | // check if there is already an equal entry |
1086 | - $rows = $this->getDuplicateRowsIfExist($tstamp,$fieldArray); |
|
1086 | + $rows = $this->getDuplicateRowsIfExist($tstamp, $fieldArray); |
|
1087 | 1087 | } |
1088 | 1088 | |
1089 | 1089 | if (count($rows) == 0) { |
@@ -1091,9 +1091,9 @@ discard block |
||
1091 | 1091 | $uid = $this->db->sql_insert_id(); |
1092 | 1092 | $rows[] = $uid; |
1093 | 1093 | $urlAdded = true; |
1094 | - tx_crawler_domain_events_dispatcher::getInstance()->post('urlAddedToQueue',$this->setID,array('uid' => $uid, 'fieldArray' => $fieldArray)); |
|
1095 | - }else{ |
|
1096 | - tx_crawler_domain_events_dispatcher::getInstance()->post('duplicateUrlInQueue',$this->setID,array('rows' => $rows, 'fieldArray' => $fieldArray)); |
|
1094 | + tx_crawler_domain_events_dispatcher::getInstance()->post('urlAddedToQueue', $this->setID, array('uid' => $uid, 'fieldArray' => $fieldArray)); |
|
1095 | + } else { |
|
1096 | + tx_crawler_domain_events_dispatcher::getInstance()->post('duplicateUrlInQueue', $this->setID, array('rows' => $rows, 'fieldArray' => $fieldArray)); |
|
1097 | 1097 | } |
1098 | 1098 | } |
1099 | 1099 | |
@@ -1111,34 +1111,34 @@ discard block |
||
1111 | 1111 | * @author Timo Schmidt |
1112 | 1112 | * @return array; |
1113 | 1113 | */ |
1114 | - protected function getDuplicateRowsIfExist($tstamp,$fieldArray){ |
|
1114 | + protected function getDuplicateRowsIfExist($tstamp, $fieldArray) { |
|
1115 | 1115 | $rows = array(); |
1116 | 1116 | |
1117 | 1117 | $currentTime = $this->getCurrentTime(); |
1118 | 1118 | |
1119 | 1119 | //if this entry is scheduled with "now" |
1120 | 1120 | if ($tstamp <= $currentTime) { |
1121 | - if($this->extensionSettings['enableTimeslot']){ |
|
1121 | + if ($this->extensionSettings['enableTimeslot']) { |
|
1122 | 1122 | $timeBegin = $currentTime - 100; |
1123 | - $timeEnd = $currentTime + 100; |
|
1124 | - $where = ' ((scheduled BETWEEN '.$timeBegin.' AND '.$timeEnd.' ) OR scheduled <= '. $currentTime.') '; |
|
1125 | - }else{ |
|
1126 | - $where = 'scheduled <= ' . $currentTime; |
|
1123 | + $timeEnd = $currentTime + 100; |
|
1124 | + $where = ' ((scheduled BETWEEN '.$timeBegin.' AND '.$timeEnd.' ) OR scheduled <= '.$currentTime.') '; |
|
1125 | + } else { |
|
1126 | + $where = 'scheduled <= '.$currentTime; |
|
1127 | 1127 | } |
1128 | 1128 | } elseif ($tstamp > $currentTime) { |
1129 | 1129 | //entry with a timestamp in the future need to have the same schedule time |
1130 | - $where = 'scheduled = ' . $tstamp ; |
|
1130 | + $where = 'scheduled = '.$tstamp; |
|
1131 | 1131 | } |
1132 | 1132 | |
1133 | - if(!empty($where)){ |
|
1133 | + if (!empty($where)) { |
|
1134 | 1134 | $result = $this->db->exec_SELECTgetRows( |
1135 | 1135 | 'qid', |
1136 | 1136 | 'tx_crawler_queue', |
1137 | 1137 | $where. |
1138 | - ' AND NOT exec_time' . |
|
1138 | + ' AND NOT exec_time'. |
|
1139 | 1139 | ' AND NOT process_id '. |
1140 | 1140 | ' AND page_id='.intval($fieldArray['page_id']). |
1141 | - ' AND parameters_hash = ' . $this->db->fullQuoteStr($fieldArray['parameters_hash'], 'tx_crawler_queue') |
|
1141 | + ' AND parameters_hash = '.$this->db->fullQuoteStr($fieldArray['parameters_hash'], 'tx_crawler_queue') |
|
1142 | 1142 | ); |
1143 | 1143 | |
1144 | 1144 | if (is_array($result)) { |
@@ -1158,7 +1158,7 @@ discard block |
||
1158 | 1158 | * @author Timo Schmidt <[email protected]> |
1159 | 1159 | * @return int |
1160 | 1160 | */ |
1161 | - public function getCurrentTime(){ |
|
1161 | + public function getCurrentTime() { |
|
1162 | 1162 | return time(); |
1163 | 1163 | } |
1164 | 1164 | |
@@ -1180,18 +1180,18 @@ discard block |
||
1180 | 1180 | function readUrl($queueId, $force = FALSE) { |
1181 | 1181 | $ret = 0; |
1182 | 1182 | if ($this->debugMode) { |
1183 | - \TYPO3\CMS\Core\Utility\GeneralUtility::devlog('crawler-readurl start ' . microtime(true), __FUNCTION__); |
|
1183 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devlog('crawler-readurl start '.microtime(true), __FUNCTION__); |
|
1184 | 1184 | } |
1185 | 1185 | // Get entry: |
1186 | 1186 | list($queueRec) = $this->db->exec_SELECTgetRows('*', 'tx_crawler_queue', |
1187 | - 'qid=' . intval($queueId) . ($force ? '' : ' AND exec_time=0 AND process_scheduled > 0')); |
|
1187 | + 'qid='.intval($queueId).($force ? '' : ' AND exec_time=0 AND process_scheduled > 0')); |
|
1188 | 1188 | |
1189 | 1189 | if (!is_array($queueRec)) { |
1190 | 1190 | return; |
1191 | 1191 | } |
1192 | 1192 | |
1193 | - $pageUidRootTypoScript = \AOE\Crawler\Utility\TypoScriptUtility::getPageUidForTypoScriptRootTemplateInRootLine((int)$queueRec['page_id']); |
|
1194 | - $this->initTSFE((int)$pageUidRootTypoScript); |
|
1193 | + $pageUidRootTypoScript = \AOE\Crawler\Utility\TypoScriptUtility::getPageUidForTypoScriptRootTemplateInRootLine((int) $queueRec['page_id']); |
|
1194 | + $this->initTSFE((int) $pageUidRootTypoScript); |
|
1195 | 1195 | |
1196 | 1196 | \AOE\Crawler\Utility\SignalSlotUtility::emitSignal( |
1197 | 1197 | __CLASS__, |
@@ -1206,7 +1206,7 @@ discard block |
||
1206 | 1206 | //if mulitprocessing is used we need to store the id of the process which has handled this entry |
1207 | 1207 | $field_array['process_id_completed'] = $this->processID; |
1208 | 1208 | } |
1209 | - $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid=' . intval($queueId), $field_array); |
|
1209 | + $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid='.intval($queueId), $field_array); |
|
1210 | 1210 | |
1211 | 1211 | $result = $this->readUrl_exec($queueRec); |
1212 | 1212 | $resultData = unserialize($result['content']); |
@@ -1235,11 +1235,11 @@ discard block |
||
1235 | 1235 | array($queueId, &$field_array) |
1236 | 1236 | ); |
1237 | 1237 | |
1238 | - $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid=' . intval($queueId), $field_array); |
|
1238 | + $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid='.intval($queueId), $field_array); |
|
1239 | 1239 | |
1240 | 1240 | |
1241 | 1241 | if ($this->debugMode) { |
1242 | - \TYPO3\CMS\Core\Utility\GeneralUtility::devlog('crawler-readurl stop ' . microtime(true), __FUNCTION__); |
|
1242 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devlog('crawler-readurl stop '.microtime(true), __FUNCTION__); |
|
1243 | 1243 | } |
1244 | 1244 | |
1245 | 1245 | return $ret; |
@@ -1251,7 +1251,7 @@ discard block |
||
1251 | 1251 | * @param integer Queue field array, |
1252 | 1252 | * @return string |
1253 | 1253 | */ |
1254 | - function readUrlFromArray($field_array) { |
|
1254 | + function readUrlFromArray($field_array) { |
|
1255 | 1255 | |
1256 | 1256 | // Set exec_time to lock record: |
1257 | 1257 | $field_array['exec_time'] = $this->getCurrentTime(); |
@@ -1262,7 +1262,7 @@ discard block |
||
1262 | 1262 | |
1263 | 1263 | // Set result in log which also denotes the end of the processing of this entry. |
1264 | 1264 | $field_array = array('result_data' => serialize($result)); |
1265 | - $this->db->exec_UPDATEquery('tx_crawler_queue','qid='.intval($queueId), $field_array); |
|
1265 | + $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid='.intval($queueId), $field_array); |
|
1266 | 1266 | |
1267 | 1267 | return $result; |
1268 | 1268 | } |
@@ -1273,17 +1273,17 @@ discard block |
||
1273 | 1273 | * @param array Queue record |
1274 | 1274 | * @return string Result output. |
1275 | 1275 | */ |
1276 | - function readUrl_exec($queueRec) { |
|
1276 | + function readUrl_exec($queueRec) { |
|
1277 | 1277 | // Decode parameters: |
1278 | 1278 | $parameters = unserialize($queueRec['parameters']); |
1279 | 1279 | $result = 'ERROR'; |
1280 | - if (is_array($parameters)) { |
|
1281 | - if ($parameters['_CALLBACKOBJ']) { // Calling object: |
|
1280 | + if (is_array($parameters)) { |
|
1281 | + if ($parameters['_CALLBACKOBJ']) { // Calling object: |
|
1282 | 1282 | $objRef = $parameters['_CALLBACKOBJ']; |
1283 | 1283 | $callBackObj = &\TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($objRef); |
1284 | - if (is_object($callBackObj)) { |
|
1284 | + if (is_object($callBackObj)) { |
|
1285 | 1285 | unset($parameters['_CALLBACKOBJ']); |
1286 | - $result = array('content' => serialize($callBackObj->crawler_execute($parameters,$this))); |
|
1286 | + $result = array('content' => serialize($callBackObj->crawler_execute($parameters, $this))); |
|
1287 | 1287 | } else { |
1288 | 1288 | $result = array('content' => 'No object: '.$objRef); |
1289 | 1289 | } |
@@ -1293,9 +1293,9 @@ discard block |
||
1293 | 1293 | $crawlerId = $queueRec['qid'].':'.md5($queueRec['qid'].'|'.$queueRec['set_id'].'|'.$GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey']); |
1294 | 1294 | |
1295 | 1295 | // Get result: |
1296 | - $result = $this->requestUrl($parameters['url'],$crawlerId); |
|
1296 | + $result = $this->requestUrl($parameters['url'], $crawlerId); |
|
1297 | 1297 | |
1298 | - tx_crawler_domain_events_dispatcher::getInstance()->post('urlCrawled',$queueRec['set_id'],array('url' => $parameters['url'], 'result' => $result)); |
|
1298 | + tx_crawler_domain_events_dispatcher::getInstance()->post('urlCrawled', $queueRec['set_id'], array('url' => $parameters['url'], 'result' => $result)); |
|
1299 | 1299 | } |
1300 | 1300 | } |
1301 | 1301 | |
@@ -1312,7 +1312,7 @@ discard block |
||
1312 | 1312 | * @param integer $recursion Recursion limiter for 302 redirects |
1313 | 1313 | * @return array Array with content |
1314 | 1314 | */ |
1315 | - function requestUrl($originalUrl, $crawlerId, $timeout=2, $recursion=10) { |
|
1315 | + function requestUrl($originalUrl, $crawlerId, $timeout = 2, $recursion = 10) { |
|
1316 | 1316 | |
1317 | 1317 | if (!$recursion) return false; |
1318 | 1318 | |
@@ -1324,7 +1324,7 @@ discard block |
||
1324 | 1324 | return FALSE; |
1325 | 1325 | } |
1326 | 1326 | |
1327 | - if (!in_array($url['scheme'], array('','http','https'))) { |
|
1327 | + if (!in_array($url['scheme'], array('', 'http', 'https'))) { |
|
1328 | 1328 | if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Scheme does not match for url "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
1329 | 1329 | return FALSE; |
1330 | 1330 | } |
@@ -1342,14 +1342,14 @@ discard block |
||
1342 | 1342 | |
1343 | 1343 | if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['curlUse'] && $GLOBALS['TYPO3_CONF_VARS']['SYS']['curlProxyServer']) { |
1344 | 1344 | $rurl = parse_url($GLOBALS['TYPO3_CONF_VARS']['SYS']['curlProxyServer']); |
1345 | - $url['path'] = $url['scheme'] . '://' . $url['host'] . ($url['port'] > 0 ? ':' . $url['port'] : '') . $url['path']; |
|
1345 | + $url['path'] = $url['scheme'].'://'.$url['host'].($url['port'] > 0 ? ':'.$url['port'] : '').$url['path']; |
|
1346 | 1346 | $reqHeaders = $this->buildRequestHeaderArray($url, $crawlerId); |
1347 | 1347 | } |
1348 | 1348 | |
1349 | 1349 | $host = $rurl['host']; |
1350 | 1350 | |
1351 | 1351 | if ($url['scheme'] == 'https') { |
1352 | - $host = 'ssl://' . $host; |
|
1352 | + $host = 'ssl://'.$host; |
|
1353 | 1353 | $port = ($rurl['port'] > 0) ? $rurl['port'] : 443; |
1354 | 1354 | } else { |
1355 | 1355 | $port = ($rurl['port'] > 0) ? $rurl['port'] : 80; |
@@ -1363,24 +1363,24 @@ discard block |
||
1363 | 1363 | return FALSE; |
1364 | 1364 | } else { |
1365 | 1365 | // Request message: |
1366 | - $msg = implode("\r\n",$reqHeaders)."\r\n\r\n"; |
|
1367 | - fputs ($fp, $msg); |
|
1366 | + $msg = implode("\r\n", $reqHeaders)."\r\n\r\n"; |
|
1367 | + fputs($fp, $msg); |
|
1368 | 1368 | |
1369 | 1369 | // Read response: |
1370 | 1370 | $d = $this->getHttpResponseFromStream($fp); |
1371 | - fclose ($fp); |
|
1371 | + fclose($fp); |
|
1372 | 1372 | |
1373 | 1373 | $time = microtime(true) - $startTime; |
1374 | - $this->log($originalUrl .' '.$time); |
|
1374 | + $this->log($originalUrl.' '.$time); |
|
1375 | 1375 | |
1376 | 1376 | // Implode content and headers: |
1377 | 1377 | $result = array( |
1378 | 1378 | 'request' => $msg, |
1379 | 1379 | 'headers' => implode('', $d['headers']), |
1380 | - 'content' => implode('', (array)$d['content']) |
|
1380 | + 'content' => implode('', (array) $d['content']) |
|
1381 | 1381 | ); |
1382 | 1382 | |
1383 | - if (($this->extensionSettings['follow30x']) && ($newUrl = $this->getRequestUrlFrom302Header($d['headers'],$url['user'],$url['pass']))) { |
|
1383 | + if (($this->extensionSettings['follow30x']) && ($newUrl = $this->getRequestUrlFrom302Header($d['headers'], $url['user'], $url['pass']))) { |
|
1384 | 1384 | $result = array_merge(array('parentRequest'=>$result), $this->requestUrl($newUrl, $crawlerId, $recursion--)); |
1385 | 1385 | $newRequestUrl = $this->requestUrl($newUrl, $crawlerId, $timeout, --$recursion); |
1386 | 1386 | |
@@ -1419,8 +1419,8 @@ discard block |
||
1419 | 1419 | |
1420 | 1420 | // Base path must be '/<pathSegements>/': |
1421 | 1421 | if ($frontendBasePath != '/') { |
1422 | - $frontendBasePath = '/' . ltrim($frontendBasePath, '/'); |
|
1423 | - $frontendBasePath = rtrim($frontendBasePath, '/') . '/'; |
|
1422 | + $frontendBasePath = '/'.ltrim($frontendBasePath, '/'); |
|
1423 | + $frontendBasePath = rtrim($frontendBasePath, '/').'/'; |
|
1424 | 1424 | } |
1425 | 1425 | |
1426 | 1426 | return $frontendBasePath; |
@@ -1450,7 +1450,7 @@ discard block |
||
1450 | 1450 | |
1451 | 1451 | if (is_resource($streamPointer)) { |
1452 | 1452 | // read headers |
1453 | - while($line = fgets($streamPointer, '2048')) { |
|
1453 | + while ($line = fgets($streamPointer, '2048')) { |
|
1454 | 1454 | $line = trim($line); |
1455 | 1455 | if ($line !== '') { |
1456 | 1456 | $response['headers'][] = $line; |
@@ -1460,7 +1460,7 @@ discard block |
||
1460 | 1460 | } |
1461 | 1461 | |
1462 | 1462 | // read content |
1463 | - while($line = fgets($streamPointer, '2048')) { |
|
1463 | + while ($line = fgets($streamPointer, '2048')) { |
|
1464 | 1464 | $response['content'][] = $line; |
1465 | 1465 | } |
1466 | 1466 | } |
@@ -1473,7 +1473,7 @@ discard block |
||
1473 | 1473 | */ |
1474 | 1474 | protected function log($message) { |
1475 | 1475 | if (!empty($this->extensionSettings['logFileName'])) { |
1476 | - @file_put_contents($this->extensionSettings['logFileName'], date('Ymd His') . $message . "\n", FILE_APPEND); |
|
1476 | + @file_put_contents($this->extensionSettings['logFileName'], date('Ymd His').$message."\n", FILE_APPEND); |
|
1477 | 1477 | } |
1478 | 1478 | } |
1479 | 1479 | |
@@ -1489,12 +1489,12 @@ discard block |
||
1489 | 1489 | $reqHeaders = array(); |
1490 | 1490 | $reqHeaders[] = 'GET '.$url['path'].($url['query'] ? '?'.$url['query'] : '').' HTTP/1.0'; |
1491 | 1491 | $reqHeaders[] = 'Host: '.$url['host']; |
1492 | - if (stristr($url['query'],'ADMCMD_previewWS')) { |
|
1492 | + if (stristr($url['query'], 'ADMCMD_previewWS')) { |
|
1493 | 1493 | $reqHeaders[] = 'Cookie: $Version="1"; be_typo_user="1"; $Path=/'; |
1494 | 1494 | } |
1495 | 1495 | $reqHeaders[] = 'Connection: close'; |
1496 | - if ($url['user']!='') { |
|
1497 | - $reqHeaders[] = 'Authorization: Basic '. base64_encode($url['user'].':'.$url['pass']); |
|
1496 | + if ($url['user'] != '') { |
|
1497 | + $reqHeaders[] = 'Authorization: Basic '.base64_encode($url['user'].':'.$url['pass']); |
|
1498 | 1498 | } |
1499 | 1499 | $reqHeaders[] = 'X-T3crawler: '.$crawlerId; |
1500 | 1500 | $reqHeaders[] = 'User-Agent: TYPO3 crawler'; |
@@ -1509,21 +1509,21 @@ discard block |
||
1509 | 1509 | * @param string HTTP Auth. Password |
1510 | 1510 | * @return string URL from redirection |
1511 | 1511 | */ |
1512 | - protected function getRequestUrlFrom302Header($headers,$user='',$pass='') { |
|
1513 | - if(!is_array($headers)) return false; |
|
1514 | - if(!(stristr($headers[0],'301 Moved') || stristr($headers[0],'302 Found') || stristr($headers[0],'302 Moved'))) return false; |
|
1512 | + protected function getRequestUrlFrom302Header($headers, $user = '', $pass = '') { |
|
1513 | + if (!is_array($headers)) return false; |
|
1514 | + if (!(stristr($headers[0], '301 Moved') || stristr($headers[0], '302 Found') || stristr($headers[0], '302 Moved'))) return false; |
|
1515 | 1515 | |
1516 | - foreach($headers as $hl) { |
|
1517 | - $tmp = explode(": ",$hl); |
|
1516 | + foreach ($headers as $hl) { |
|
1517 | + $tmp = explode(": ", $hl); |
|
1518 | 1518 | $header[trim($tmp[0])] = trim($tmp[1]); |
1519 | - if(trim($tmp[0])=='Location') break; |
|
1519 | + if (trim($tmp[0]) == 'Location') break; |
|
1520 | 1520 | } |
1521 | - if(!array_key_exists('Location',$header)) return false; |
|
1521 | + if (!array_key_exists('Location', $header)) return false; |
|
1522 | 1522 | |
1523 | - if($user!='') { |
|
1524 | - if(!($tmp = parse_url($header['Location']))) return false; |
|
1525 | - $newUrl = $tmp['scheme'] . '://' . $user . ':' . $pass . '@' . $tmp['host'] . $tmp['path']; |
|
1526 | - if($tmp['query']!='') $newUrl .= '?' . $tmp['query']; |
|
1523 | + if ($user != '') { |
|
1524 | + if (!($tmp = parse_url($header['Location']))) return false; |
|
1525 | + $newUrl = $tmp['scheme'].'://'.$user.':'.$pass.'@'.$tmp['host'].$tmp['path']; |
|
1526 | + if ($tmp['query'] != '') $newUrl .= '?'.$tmp['query']; |
|
1527 | 1527 | } else { |
1528 | 1528 | $newUrl = $header['Location']; |
1529 | 1529 | } |
@@ -1551,15 +1551,15 @@ discard block |
||
1551 | 1551 | * @param object TSFE object (reference under PHP5) |
1552 | 1552 | * @return void |
1553 | 1553 | */ |
1554 | - function fe_init(&$params, $ref) { |
|
1554 | + function fe_init(&$params, $ref) { |
|
1555 | 1555 | |
1556 | 1556 | // Authenticate crawler request: |
1557 | - if (isset($_SERVER['HTTP_X_T3CRAWLER'])) { |
|
1558 | - list($queueId,$hash) = explode(':', $_SERVER['HTTP_X_T3CRAWLER']); |
|
1559 | - list($queueRec) = $this->db->exec_SELECTgetRows('*','tx_crawler_queue','qid='.intval($queueId)); |
|
1557 | + if (isset($_SERVER['HTTP_X_T3CRAWLER'])) { |
|
1558 | + list($queueId, $hash) = explode(':', $_SERVER['HTTP_X_T3CRAWLER']); |
|
1559 | + list($queueRec) = $this->db->exec_SELECTgetRows('*', 'tx_crawler_queue', 'qid='.intval($queueId)); |
|
1560 | 1560 | |
1561 | 1561 | // If a crawler record was found and hash was matching, set it up: |
1562 | - if (is_array($queueRec) && $hash === md5($queueRec['qid'].'|'.$queueRec['set_id'].'|'.$GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey'])) { |
|
1562 | + if (is_array($queueRec) && $hash === md5($queueRec['qid'].'|'.$queueRec['set_id'].'|'.$GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey'])) { |
|
1563 | 1563 | $params['pObj']->applicationData['tx_crawler']['running'] = TRUE; |
1564 | 1564 | $params['pObj']->applicationData['tx_crawler']['parameters'] = unserialize($queueRec['parameters']); |
1565 | 1565 | $params['pObj']->applicationData['tx_crawler']['log'] = array(); |
@@ -1619,7 +1619,7 @@ discard block |
||
1619 | 1619 | /* @var $tree \TYPO3\CMS\Backend\Tree\View\PageTreeView */ |
1620 | 1620 | $tree = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\CMS\Backend\Tree\View\PageTreeView'); |
1621 | 1621 | $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1); |
1622 | - $tree->init('AND ' . $perms_clause); |
|
1622 | + $tree->init('AND '.$perms_clause); |
|
1623 | 1623 | |
1624 | 1624 | $pageinfo = \TYPO3\CMS\Backend\Utility\BackendUtility::readPageAccess($id, $perms_clause); |
1625 | 1625 | if (VersionNumberUtility::convertVersionNumberToInteger(VersionNumberUtility::getCurrentTypo3Version()) < 8000000) { |
@@ -1636,7 +1636,7 @@ discard block |
||
1636 | 1636 | } |
1637 | 1637 | |
1638 | 1638 | // Get branch beneath: |
1639 | - if ($depth) { |
|
1639 | + if ($depth) { |
|
1640 | 1640 | $tree->getTree($id, $depth, ''); |
1641 | 1641 | } |
1642 | 1642 | |
@@ -1648,7 +1648,7 @@ discard block |
||
1648 | 1648 | $this->MP = false; |
1649 | 1649 | |
1650 | 1650 | // recognize mount points |
1651 | - if($data['row']['doktype'] == 7){ |
|
1651 | + if ($data['row']['doktype'] == 7) { |
|
1652 | 1652 | $mountpage = $this->db->exec_SELECTgetRows('*', 'pages', 'uid = '.$data['row']['uid']); |
1653 | 1653 | |
1654 | 1654 | // fetch mounted pages |
@@ -1658,15 +1658,15 @@ discard block |
||
1658 | 1658 | $mountTree->init('AND '.$perms_clause); |
1659 | 1659 | $mountTree->getTree($mountpage[0]['mount_pid'], $depth, ''); |
1660 | 1660 | |
1661 | - foreach($mountTree->tree as $mountData) { |
|
1661 | + foreach ($mountTree->tree as $mountData) { |
|
1662 | 1662 | $code .= $this->drawURLs_addRowsForPage( |
1663 | 1663 | $mountData['row'], |
1664 | - $mountData['HTML'].\TYPO3\CMS\Backend\Utility\BackendUtility::getRecordTitle('pages',$mountData['row'],TRUE) |
|
1664 | + $mountData['HTML'].\TYPO3\CMS\Backend\Utility\BackendUtility::getRecordTitle('pages', $mountData['row'], TRUE) |
|
1665 | 1665 | ); |
1666 | 1666 | } |
1667 | 1667 | |
1668 | 1668 | // replace page when mount_pid_ol is enabled |
1669 | - if($mountpage[0]['mount_pid_ol']){ |
|
1669 | + if ($mountpage[0]['mount_pid_ol']) { |
|
1670 | 1670 | $data['row']['uid'] = $mountpage[0]['mount_pid']; |
1671 | 1671 | } else { |
1672 | 1672 | // if the mount_pid_ol is not set the MP must not be used for the mountpoint page |
@@ -1676,7 +1676,7 @@ discard block |
||
1676 | 1676 | |
1677 | 1677 | $code .= $this->drawURLs_addRowsForPage( |
1678 | 1678 | $data['row'], |
1679 | - $data['HTML'] . \TYPO3\CMS\Backend\Utility\BackendUtility::getRecordTitle('pages', $data['row'], TRUE) |
|
1679 | + $data['HTML'].\TYPO3\CMS\Backend\Utility\BackendUtility::getRecordTitle('pages', $data['row'], TRUE) |
|
1680 | 1680 | ); |
1681 | 1681 | } |
1682 | 1682 | |
@@ -1700,7 +1700,7 @@ discard block |
||
1700 | 1700 | if (!empty($excludeString)) { |
1701 | 1701 | /* @var $tree \TYPO3\CMS\Backend\Tree\View\PageTreeView */ |
1702 | 1702 | $tree = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\CMS\Backend\Tree\View\PageTreeView'); |
1703 | - $tree->init('AND ' . $this->backendUser->getPagePermsClause(1)); |
|
1703 | + $tree->init('AND '.$this->backendUser->getPagePermsClause(1)); |
|
1704 | 1704 | |
1705 | 1705 | $excludeParts = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $excludeString); |
1706 | 1706 | |
@@ -1709,7 +1709,7 @@ discard block |
||
1709 | 1709 | |
1710 | 1710 | // default is "page only" = "depth=0" |
1711 | 1711 | if (empty($depth)) { |
1712 | - $depth = ( stristr($excludePart,'+')) ? 99 : 0; |
|
1712 | + $depth = (stristr($excludePart, '+')) ? 99 : 0; |
|
1713 | 1713 | } |
1714 | 1714 | |
1715 | 1715 | $pidList[] = $pid; |
@@ -1742,7 +1742,7 @@ discard block |
||
1742 | 1742 | * @param string Page icon and title for row |
1743 | 1743 | * @return string HTML <tr> content (one or more) |
1744 | 1744 | */ |
1745 | - public function drawURLs_addRowsForPage(array $pageRow, $pageTitleAndIcon) { |
|
1745 | + public function drawURLs_addRowsForPage(array $pageRow, $pageTitleAndIcon) { |
|
1746 | 1746 | |
1747 | 1747 | $skipMessage = ''; |
1748 | 1748 | |
@@ -1763,7 +1763,7 @@ discard block |
||
1763 | 1763 | $cc = 0; |
1764 | 1764 | $content = ''; |
1765 | 1765 | if (count($configurations)) { |
1766 | - foreach($configurations as $confKey => $confArray) { |
|
1766 | + foreach ($configurations as $confKey => $confArray) { |
|
1767 | 1767 | |
1768 | 1768 | // Title column: |
1769 | 1769 | if (!$c) { |
@@ -1792,47 +1792,47 @@ discard block |
||
1792 | 1792 | $paramExpanded = ''; |
1793 | 1793 | $calcAccu = array(); |
1794 | 1794 | $calcRes = 1; |
1795 | - foreach($confArray['paramExpanded'] as $gVar => $gVal) { |
|
1796 | - $paramExpanded.= ' |
|
1795 | + foreach ($confArray['paramExpanded'] as $gVar => $gVal) { |
|
1796 | + $paramExpanded .= ' |
|
1797 | 1797 | <tr> |
1798 | 1798 | <td class="bgColor4-20">'.htmlspecialchars('&'.$gVar.'=').'<br/>'. |
1799 | 1799 | '('.count($gVal).')'. |
1800 | 1800 | '</td> |
1801 | - <td class="bgColor4" nowrap="nowrap">'.nl2br(htmlspecialchars(implode(chr(10),$gVal))).'</td> |
|
1801 | + <td class="bgColor4" nowrap="nowrap">'.nl2br(htmlspecialchars(implode(chr(10), $gVal))).'</td> |
|
1802 | 1802 | </tr> |
1803 | 1803 | '; |
1804 | - $calcRes*= count($gVal); |
|
1804 | + $calcRes *= count($gVal); |
|
1805 | 1805 | $calcAccu[] = count($gVal); |
1806 | 1806 | } |
1807 | 1807 | $paramExpanded = '<table class="lrPadding c-list param-expanded">'.$paramExpanded.'</table>'; |
1808 | - $paramExpanded.= 'Comb: '.implode('*',$calcAccu).'='.$calcRes; |
|
1808 | + $paramExpanded .= 'Comb: '.implode('*', $calcAccu).'='.$calcRes; |
|
1809 | 1809 | |
1810 | 1810 | // Options |
1811 | 1811 | $optionValues = ''; |
1812 | - if ($confArray['subCfg']['userGroups']) { |
|
1813 | - $optionValues.='User Groups: '.$confArray['subCfg']['userGroups'].'<br/>'; |
|
1812 | + if ($confArray['subCfg']['userGroups']) { |
|
1813 | + $optionValues .= 'User Groups: '.$confArray['subCfg']['userGroups'].'<br/>'; |
|
1814 | 1814 | } |
1815 | - if ($confArray['subCfg']['baseUrl']) { |
|
1816 | - $optionValues.='Base Url: '.$confArray['subCfg']['baseUrl'].'<br/>'; |
|
1815 | + if ($confArray['subCfg']['baseUrl']) { |
|
1816 | + $optionValues .= 'Base Url: '.$confArray['subCfg']['baseUrl'].'<br/>'; |
|
1817 | 1817 | } |
1818 | - if ($confArray['subCfg']['procInstrFilter']) { |
|
1819 | - $optionValues.='ProcInstr: '.$confArray['subCfg']['procInstrFilter'].'<br/>'; |
|
1818 | + if ($confArray['subCfg']['procInstrFilter']) { |
|
1819 | + $optionValues .= 'ProcInstr: '.$confArray['subCfg']['procInstrFilter'].'<br/>'; |
|
1820 | 1820 | } |
1821 | 1821 | |
1822 | 1822 | // Compile row: |
1823 | 1823 | $content .= ' |
1824 | - <tr class="bgColor' . ($c%2 ? '-20':'-10') . '"> |
|
1825 | - ' . $titleClm . ' |
|
1826 | - <td>' . htmlspecialchars($confKey) . '</td> |
|
1827 | - <td>' . nl2br(htmlspecialchars(rawurldecode(trim(str_replace('&', chr(10) . '&', \TYPO3\CMS\Core\Utility\GeneralUtility::implodeArrayForUrl('', $confArray['paramParsed'])))))) . '</td> |
|
1824 | + <tr class="bgColor' . ($c % 2 ? '-20' : '-10').'"> |
|
1825 | + ' . $titleClm.' |
|
1826 | + <td>' . htmlspecialchars($confKey).'</td> |
|
1827 | + <td>' . nl2br(htmlspecialchars(rawurldecode(trim(str_replace('&', chr(10).'&', \TYPO3\CMS\Core\Utility\GeneralUtility::implodeArrayForUrl('', $confArray['paramParsed'])))))).'</td> |
|
1828 | 1828 | <td>'.$paramExpanded.'</td> |
1829 | - <td nowrap="nowrap">' . $urlList . '</td> |
|
1830 | - <td nowrap="nowrap">' . $optionValues . '</td> |
|
1831 | - <td nowrap="nowrap">' . \TYPO3\CMS\Core\Utility\DebugUtility::viewArray($confArray['subCfg']['procInstrParams.']) . '</td> |
|
1829 | + <td nowrap="nowrap">' . $urlList.'</td> |
|
1830 | + <td nowrap="nowrap">' . $optionValues.'</td> |
|
1831 | + <td nowrap="nowrap">' . \TYPO3\CMS\Core\Utility\DebugUtility::viewArray($confArray['subCfg']['procInstrParams.']).'</td> |
|
1832 | 1832 | </tr>'; |
1833 | 1833 | } else { |
1834 | 1834 | |
1835 | - $content .= '<tr class="bgColor'.($c%2 ? '-20':'-10') . '"> |
|
1835 | + $content .= '<tr class="bgColor'.($c % 2 ? '-20' : '-10').'"> |
|
1836 | 1836 | '.$titleClm.' |
1837 | 1837 | <td>'.htmlspecialchars($confKey).'</td> |
1838 | 1838 | <td colspan="5"><em>No entries</em> (Page is excluded in this configuration)</td> |
@@ -1847,7 +1847,7 @@ discard block |
||
1847 | 1847 | $message = !empty($skipMessage) ? ' ('.$skipMessage.')' : ''; |
1848 | 1848 | |
1849 | 1849 | // Compile row: |
1850 | - $content.= ' |
|
1850 | + $content .= ' |
|
1851 | 1851 | <tr class="bgColor-20" style="border-bottom: 1px solid black;"> |
1852 | 1852 | <td>'.$pageTitleAndIcon.'</td> |
1853 | 1853 | <td colspan="6"><em>No entries</em>'.$message.'</td> |
@@ -1925,7 +1925,7 @@ discard block |
||
1925 | 1925 | $releaseStatus = $this->CLI_releaseProcesses($this->CLI_buildProcessId()); |
1926 | 1926 | |
1927 | 1927 | $this->CLI_debug("Unprocessed Items remaining:".$this->getUnprocessedItemsCount()." (".$this->CLI_buildProcessId().")"); |
1928 | - $result |= ( $this->getUnprocessedItemsCount() > 0 ? self::CLI_STATUS_REMAIN : self::CLI_STATUS_NOTHING_PROCCESSED ); |
|
1928 | + $result |= ($this->getUnprocessedItemsCount() > 0 ? self::CLI_STATUS_REMAIN : self::CLI_STATUS_NOTHING_PROCCESSED); |
|
1929 | 1929 | } else { |
1930 | 1930 | $result |= self::CLI_STATUS_ABORTED; |
1931 | 1931 | } |
@@ -1938,7 +1938,7 @@ discard block |
||
1938 | 1938 | * |
1939 | 1939 | * @return void |
1940 | 1940 | */ |
1941 | - function CLI_main_im() { |
|
1941 | + function CLI_main_im() { |
|
1942 | 1942 | $this->setAccessMode('cli_im'); |
1943 | 1943 | |
1944 | 1944 | $cliObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_cli_im'); |
@@ -1948,7 +1948,7 @@ discard block |
||
1948 | 1948 | $this->backendUser->setWorkspace(0); |
1949 | 1949 | |
1950 | 1950 | // Print help |
1951 | - if (!isset($cliObj->cli_args['_DEFAULT'][1])) { |
|
1951 | + if (!isset($cliObj->cli_args['_DEFAULT'][1])) { |
|
1952 | 1952 | $cliObj->cli_validateArgs(); |
1953 | 1953 | $cliObj->cli_help(); |
1954 | 1954 | exit; |
@@ -1956,8 +1956,8 @@ discard block |
||
1956 | 1956 | |
1957 | 1957 | $cliObj->cli_validateArgs(); |
1958 | 1958 | |
1959 | - if ($cliObj->cli_argValue('-o')==='exec') { |
|
1960 | - $this->registerQueueEntriesInternallyOnly=TRUE; |
|
1959 | + if ($cliObj->cli_argValue('-o') === 'exec') { |
|
1960 | + $this->registerQueueEntriesInternallyOnly = TRUE; |
|
1961 | 1961 | } |
1962 | 1962 | |
1963 | 1963 | if (isset($cliObj->cli_args['_DEFAULT'][2])) { |
@@ -1970,16 +1970,16 @@ discard block |
||
1970 | 1970 | |
1971 | 1971 | $configurationKeys = $this->getConfigurationKeys($cliObj); |
1972 | 1972 | |
1973 | - if(!is_array($configurationKeys)){ |
|
1973 | + if (!is_array($configurationKeys)) { |
|
1974 | 1974 | $configurations = $this->getUrlsForPageId($pageId); |
1975 | - if(is_array($configurations)){ |
|
1975 | + if (is_array($configurations)) { |
|
1976 | 1976 | $configurationKeys = array_keys($configurations); |
1977 | - }else{ |
|
1977 | + } else { |
|
1978 | 1978 | $configurationKeys = array(); |
1979 | 1979 | } |
1980 | 1980 | } |
1981 | 1981 | |
1982 | - if($cliObj->cli_argValue('-o')==='queue' || $cliObj->cli_argValue('-o')==='exec'){ |
|
1982 | + if ($cliObj->cli_argValue('-o') === 'queue' || $cliObj->cli_argValue('-o') === 'exec') { |
|
1983 | 1983 | |
1984 | 1984 | $reason = new tx_crawler_domain_reason(); |
1985 | 1985 | $reason->setReason(tx_crawler_domain_reason::REASON_GUI_SUBMIT); |
@@ -1987,7 +1987,7 @@ discard block |
||
1987 | 1987 | tx_crawler_domain_events_dispatcher::getInstance()->post( |
1988 | 1988 | 'invokeQueueChange', |
1989 | 1989 | $this->setID, |
1990 | - array( 'reason' => $reason ) |
|
1990 | + array('reason' => $reason) |
|
1991 | 1991 | ); |
1992 | 1992 | } |
1993 | 1993 | |
@@ -1998,42 +1998,42 @@ discard block |
||
1998 | 1998 | $this->setID = \TYPO3\CMS\Core\Utility\GeneralUtility::md5int(microtime()); |
1999 | 1999 | $this->getPageTreeAndUrls( |
2000 | 2000 | $pageId, |
2001 | - \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_argValue('-d'),0,99), |
|
2001 | + \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_argValue('-d'), 0, 99), |
|
2002 | 2002 | $this->getCurrentTime(), |
2003 | - \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_isArg('-n') ? $cliObj->cli_argValue('-n') : 30,1,1000), |
|
2004 | - $cliObj->cli_argValue('-o')==='queue' || $cliObj->cli_argValue('-o')==='exec', |
|
2005 | - $cliObj->cli_argValue('-o')==='url', |
|
2006 | - \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$cliObj->cli_argValue('-proc'),1), |
|
2003 | + \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_isArg('-n') ? $cliObj->cli_argValue('-n') : 30, 1, 1000), |
|
2004 | + $cliObj->cli_argValue('-o') === 'queue' || $cliObj->cli_argValue('-o') === 'exec', |
|
2005 | + $cliObj->cli_argValue('-o') === 'url', |
|
2006 | + \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $cliObj->cli_argValue('-proc'), 1), |
|
2007 | 2007 | $configurationKeys |
2008 | 2008 | ); |
2009 | 2009 | |
2010 | - if ($cliObj->cli_argValue('-o')==='url') { |
|
2011 | - $cliObj->cli_echo(implode(chr(10),$this->downloadUrls).chr(10),1); |
|
2012 | - } elseif ($cliObj->cli_argValue('-o')==='exec') { |
|
2010 | + if ($cliObj->cli_argValue('-o') === 'url') { |
|
2011 | + $cliObj->cli_echo(implode(chr(10), $this->downloadUrls).chr(10), 1); |
|
2012 | + } elseif ($cliObj->cli_argValue('-o') === 'exec') { |
|
2013 | 2013 | $cliObj->cli_echo("Executing ".count($this->urlList)." requests right away:\n\n"); |
2014 | - $cliObj->cli_echo(implode(chr(10),$this->urlList).chr(10)); |
|
2014 | + $cliObj->cli_echo(implode(chr(10), $this->urlList).chr(10)); |
|
2015 | 2015 | $cliObj->cli_echo("\nProcessing:\n"); |
2016 | 2016 | |
2017 | - foreach($this->queueEntries as $queueRec) { |
|
2017 | + foreach ($this->queueEntries as $queueRec) { |
|
2018 | 2018 | $p = unserialize($queueRec['parameters']); |
2019 | - $cliObj->cli_echo($p['url'].' ('.implode(',',$p['procInstructions']).') => '); |
|
2019 | + $cliObj->cli_echo($p['url'].' ('.implode(',', $p['procInstructions']).') => '); |
|
2020 | 2020 | |
2021 | 2021 | $result = $this->readUrlFromArray($queueRec); |
2022 | 2022 | |
2023 | 2023 | $requestResult = unserialize($result['content']); |
2024 | - if (is_array($requestResult)) { |
|
2025 | - $resLog = is_array($requestResult['log']) ? chr(10).chr(9).chr(9).implode(chr(10).chr(9).chr(9),$requestResult['log']) : ''; |
|
2024 | + if (is_array($requestResult)) { |
|
2025 | + $resLog = is_array($requestResult['log']) ? chr(10).chr(9).chr(9).implode(chr(10).chr(9).chr(9), $requestResult['log']) : ''; |
|
2026 | 2026 | $cliObj->cli_echo('OK: '.$resLog.chr(10)); |
2027 | 2027 | } else { |
2028 | - $cliObj->cli_echo('Error checking Crawler Result: '.substr(preg_replace('/\s+/',' ',strip_tags($result['content'])),0,30000).'...'.chr(10)); |
|
2028 | + $cliObj->cli_echo('Error checking Crawler Result: '.substr(preg_replace('/\s+/', ' ', strip_tags($result['content'])), 0, 30000).'...'.chr(10)); |
|
2029 | 2029 | } |
2030 | 2030 | } |
2031 | - } elseif ($cliObj->cli_argValue('-o')==='queue') { |
|
2031 | + } elseif ($cliObj->cli_argValue('-o') === 'queue') { |
|
2032 | 2032 | $cliObj->cli_echo("Putting ".count($this->urlList)." entries in queue:\n\n"); |
2033 | - $cliObj->cli_echo(implode(chr(10),$this->urlList).chr(10)); |
|
2033 | + $cliObj->cli_echo(implode(chr(10), $this->urlList).chr(10)); |
|
2034 | 2034 | } else { |
2035 | - $cliObj->cli_echo(count($this->urlList)." entries found for processing. (Use -o to decide action):\n\n",1); |
|
2036 | - $cliObj->cli_echo(implode(chr(10),$this->urlList).chr(10),1); |
|
2035 | + $cliObj->cli_echo(count($this->urlList)." entries found for processing. (Use -o to decide action):\n\n", 1); |
|
2036 | + $cliObj->cli_echo(implode(chr(10), $this->urlList).chr(10), 1); |
|
2037 | 2037 | } |
2038 | 2038 | } |
2039 | 2039 | |
@@ -2058,12 +2058,12 @@ discard block |
||
2058 | 2058 | } |
2059 | 2059 | |
2060 | 2060 | $cliObj->cli_validateArgs(); |
2061 | - $pageId = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_args['_DEFAULT'][1],0); |
|
2061 | + $pageId = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_args['_DEFAULT'][1], 0); |
|
2062 | 2062 | $fullFlush = ($pageId == 0); |
2063 | 2063 | |
2064 | 2064 | $mode = $cliObj->cli_argValue('-o'); |
2065 | 2065 | |
2066 | - switch($mode) { |
|
2066 | + switch ($mode) { |
|
2067 | 2067 | case 'all': |
2068 | 2068 | $result = $this->getLogEntriesForPageId($pageId, '', true, $fullFlush); |
2069 | 2069 | break; |
@@ -2086,7 +2086,7 @@ discard block |
||
2086 | 2086 | * @param tx_crawler_cli_im $cliObj Command line object |
2087 | 2087 | * @return mixed Array of keys or null if no keys found |
2088 | 2088 | */ |
2089 | - protected function getConfigurationKeys(tx_crawler_cli_im &$cliObj) { |
|
2089 | + protected function getConfigurationKeys(tx_crawler_cli_im & $cliObj) { |
|
2090 | 2090 | $parameter = trim($cliObj->cli_argValue('-conf')); |
2091 | 2091 | return ($parameter != '' ? \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $parameter) : array()); |
2092 | 2092 | } |
@@ -2111,7 +2111,7 @@ discard block |
||
2111 | 2111 | $purgeDate = $this->getCurrentTime() - 24 * 60 * 60 * intval($this->extensionSettings['purgeQueueDays']); |
2112 | 2112 | $del = $this->db->exec_DELETEquery( |
2113 | 2113 | 'tx_crawler_queue', |
2114 | - 'exec_time!=0 AND exec_time<' . $purgeDate |
|
2114 | + 'exec_time!=0 AND exec_time<'.$purgeDate |
|
2115 | 2115 | ); |
2116 | 2116 | } |
2117 | 2117 | |
@@ -2128,10 +2128,10 @@ discard block |
||
2128 | 2128 | intval($countInARun) |
2129 | 2129 | ); |
2130 | 2130 | |
2131 | - if (count($rows)>0) { |
|
2131 | + if (count($rows) > 0) { |
|
2132 | 2132 | $quidList = array(); |
2133 | 2133 | |
2134 | - foreach($rows as $r) { |
|
2134 | + foreach ($rows as $r) { |
|
2135 | 2135 | $quidList[] = $r['qid']; |
2136 | 2136 | } |
2137 | 2137 | |
@@ -2142,7 +2142,7 @@ discard block |
||
2142 | 2142 | //TODO make sure we're not taking assigned queue-entires |
2143 | 2143 | $this->db->exec_UPDATEquery( |
2144 | 2144 | 'tx_crawler_queue', |
2145 | - 'qid IN ('.implode(',',$quidList).')', |
|
2145 | + 'qid IN ('.implode(',', $quidList).')', |
|
2146 | 2146 | array( |
2147 | 2147 | 'process_scheduled' => intval($this->getCurrentTime()), |
2148 | 2148 | 'process_id' => $processId |
@@ -2153,32 +2153,32 @@ discard block |
||
2153 | 2153 | $numberOfAffectedRows = $this->db->sql_affected_rows(); |
2154 | 2154 | $this->db->exec_UPDATEquery( |
2155 | 2155 | 'tx_crawler_process', |
2156 | - "process_id = '".$processId."'" , |
|
2156 | + "process_id = '".$processId."'", |
|
2157 | 2157 | array( |
2158 | 2158 | 'assigned_items_count' => intval($numberOfAffectedRows) |
2159 | 2159 | ) |
2160 | 2160 | ); |
2161 | 2161 | |
2162 | - if($numberOfAffectedRows == count($quidList)) { |
|
2162 | + if ($numberOfAffectedRows == count($quidList)) { |
|
2163 | 2163 | $this->db->sql_query('COMMIT'); |
2164 | - } else { |
|
2164 | + } else { |
|
2165 | 2165 | $this->db->sql_query('ROLLBACK'); |
2166 | 2166 | $this->CLI_debug("Nothing processed due to multi-process collision (".$this->CLI_buildProcessId().")"); |
2167 | - return ( $result | self::CLI_STATUS_ABORTED ); |
|
2167 | + return ($result | self::CLI_STATUS_ABORTED); |
|
2168 | 2168 | } |
2169 | 2169 | |
2170 | 2170 | |
2171 | 2171 | |
2172 | - foreach($rows as $r) { |
|
2172 | + foreach ($rows as $r) { |
|
2173 | 2173 | $result |= $this->readUrl($r['qid']); |
2174 | 2174 | |
2175 | 2175 | $counter++; |
2176 | - usleep(intval($sleepTime)); // Just to relax the system |
|
2176 | + usleep(intval($sleepTime)); // Just to relax the system |
|
2177 | 2177 | |
2178 | 2178 | // if during the start and the current read url the cli has been disable we need to return from the function |
2179 | 2179 | // mark the process NOT as ended. |
2180 | 2180 | if ($this->getDisabled()) { |
2181 | - return ( $result | self::CLI_STATUS_ABORTED ); |
|
2181 | + return ($result | self::CLI_STATUS_ABORTED); |
|
2182 | 2182 | } |
2183 | 2183 | |
2184 | 2184 | if (!$this->CLI_checkIfProcessIsActive($this->CLI_buildProcessId())) { |
@@ -2186,7 +2186,7 @@ discard block |
||
2186 | 2186 | |
2187 | 2187 | //TODO might need an additional returncode |
2188 | 2188 | $result |= self::CLI_STATUS_ABORTED; |
2189 | - break; //possible timeout |
|
2189 | + break; //possible timeout |
|
2190 | 2190 | } |
2191 | 2191 | } |
2192 | 2192 | |
@@ -2199,7 +2199,7 @@ discard block |
||
2199 | 2199 | $this->CLI_debug("Nothing within queue which needs to be processed (".$this->CLI_buildProcessId().")"); |
2200 | 2200 | } |
2201 | 2201 | |
2202 | - if($counter > 0) { |
|
2202 | + if ($counter > 0) { |
|
2203 | 2203 | $result |= self::CLI_STATUS_PROCESSED; |
2204 | 2204 | } |
2205 | 2205 | |
@@ -2211,12 +2211,12 @@ discard block |
||
2211 | 2211 | * |
2212 | 2212 | * @return void |
2213 | 2213 | */ |
2214 | - function CLI_runHooks() { |
|
2214 | + function CLI_runHooks() { |
|
2215 | 2215 | global $TYPO3_CONF_VARS; |
2216 | - if (is_array($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'])) { |
|
2217 | - foreach($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'] as $objRef) { |
|
2216 | + if (is_array($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'])) { |
|
2217 | + foreach ($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'] as $objRef) { |
|
2218 | 2218 | $hookObj = &\TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($objRef); |
2219 | - if (is_object($hookObj)) { |
|
2219 | + if (is_object($hookObj)) { |
|
2220 | 2220 | $hookObj->crawler_init($this); |
2221 | 2221 | } |
2222 | 2222 | } |
@@ -2253,7 +2253,7 @@ discard block |
||
2253 | 2253 | |
2254 | 2254 | $currentTime = $this->getCurrentTime(); |
2255 | 2255 | |
2256 | - while($row = $this->db->sql_fetch_assoc($res)) { |
|
2256 | + while ($row = $this->db->sql_fetch_assoc($res)) { |
|
2257 | 2257 | if ($row['ttl'] < $currentTime) { |
2258 | 2258 | $orphanProcesses[] = $row['process_id']; |
2259 | 2259 | } else { |
@@ -2263,7 +2263,7 @@ discard block |
||
2263 | 2263 | |
2264 | 2264 | // if there are less than allowed active processes then add a new one |
2265 | 2265 | if ($processCount < intval($this->extensionSettings['processLimit'])) { |
2266 | - $this->CLI_debug("add ".$this->CLI_buildProcessId()." (".($processCount+1)."/".intval($this->extensionSettings['processLimit']).")"); |
|
2266 | + $this->CLI_debug("add ".$this->CLI_buildProcessId()." (".($processCount + 1)."/".intval($this->extensionSettings['processLimit']).")"); |
|
2267 | 2267 | |
2268 | 2268 | // create new process record |
2269 | 2269 | $this->db->exec_INSERTquery( |
@@ -2296,17 +2296,17 @@ discard block |
||
2296 | 2296 | * @param boolean $withinLock show whether the DB-actions are included within an existing lock |
2297 | 2297 | * @return boolean |
2298 | 2298 | */ |
2299 | - function CLI_releaseProcesses($releaseIds, $withinLock=false) { |
|
2299 | + function CLI_releaseProcesses($releaseIds, $withinLock = false) { |
|
2300 | 2300 | |
2301 | 2301 | if (!is_array($releaseIds)) { |
2302 | 2302 | $releaseIds = array($releaseIds); |
2303 | 2303 | } |
2304 | 2304 | |
2305 | 2305 | if (!count($releaseIds) > 0) { |
2306 | - return false; //nothing to release |
|
2306 | + return false; //nothing to release |
|
2307 | 2307 | } |
2308 | 2308 | |
2309 | - if(!$withinLock) $this->db->sql_query('BEGIN'); |
|
2309 | + if (!$withinLock) $this->db->sql_query('BEGIN'); |
|
2310 | 2310 | |
2311 | 2311 | // some kind of 2nd chance algo - this way you need at least 2 processes to have a real cleanup |
2312 | 2312 | // this ensures that a single process can't mess up the entire process table |
@@ -2336,21 +2336,21 @@ discard block |
||
2336 | 2336 | // mark all requested processes as non-active |
2337 | 2337 | $this->db->exec_UPDATEquery( |
2338 | 2338 | 'tx_crawler_process', |
2339 | - 'process_id IN (\''.implode('\',\'',$releaseIds).'\') AND deleted=0', |
|
2339 | + 'process_id IN (\''.implode('\',\'', $releaseIds).'\') AND deleted=0', |
|
2340 | 2340 | array( |
2341 | 2341 | 'active'=>'0' |
2342 | 2342 | ) |
2343 | 2343 | ); |
2344 | 2344 | $this->db->exec_UPDATEquery( |
2345 | 2345 | 'tx_crawler_queue', |
2346 | - 'exec_time=0 AND process_id IN ("'.implode('","',$releaseIds).'")', |
|
2346 | + 'exec_time=0 AND process_id IN ("'.implode('","', $releaseIds).'")', |
|
2347 | 2347 | array( |
2348 | 2348 | 'process_scheduled'=>0, |
2349 | 2349 | 'process_id'=>'' |
2350 | 2350 | ) |
2351 | 2351 | ); |
2352 | 2352 | |
2353 | - if(!$withinLock) $this->db->sql_query('COMMIT'); |
|
2353 | + if (!$withinLock) $this->db->sql_query('COMMIT'); |
|
2354 | 2354 | |
2355 | 2355 | return true; |
2356 | 2356 | } |
@@ -2378,13 +2378,13 @@ discard block |
||
2378 | 2378 | $this->db->sql_query('BEGIN'); |
2379 | 2379 | $res = $this->db->exec_SELECTquery( |
2380 | 2380 | 'process_id,active,ttl', |
2381 | - 'tx_crawler_process','process_id = \''.$pid.'\' AND deleted=0', |
|
2381 | + 'tx_crawler_process', 'process_id = \''.$pid.'\' AND deleted=0', |
|
2382 | 2382 | '', |
2383 | 2383 | 'ttl', |
2384 | 2384 | '0,1' |
2385 | 2385 | ); |
2386 | - if($row = $this->db->sql_fetch_assoc($res)) { |
|
2387 | - $ret = intVal($row['active'])==1; |
|
2386 | + if ($row = $this->db->sql_fetch_assoc($res)) { |
|
2387 | + $ret = intVal($row['active']) == 1; |
|
2388 | 2388 | } |
2389 | 2389 | $this->db->sql_query('COMMIT'); |
2390 | 2390 | |
@@ -2397,8 +2397,8 @@ discard block |
||
2397 | 2397 | * @return string the ID |
2398 | 2398 | */ |
2399 | 2399 | function CLI_buildProcessId() { |
2400 | - if(!$this->processID) { |
|
2401 | - $this->processID= \TYPO3\CMS\Core\Utility\GeneralUtility::shortMD5($this->microtime(true)); |
|
2400 | + if (!$this->processID) { |
|
2401 | + $this->processID = \TYPO3\CMS\Core\Utility\GeneralUtility::shortMD5($this->microtime(true)); |
|
2402 | 2402 | } |
2403 | 2403 | return $this->processID; |
2404 | 2404 | } |
@@ -2408,7 +2408,7 @@ discard block |
||
2408 | 2408 | * |
2409 | 2409 | * @return mixed |
2410 | 2410 | */ |
2411 | - protected function microtime($get_as_float = false ) |
|
2411 | + protected function microtime($get_as_float = false) |
|
2412 | 2412 | { |
2413 | 2413 | return microtime($get_as_float); |
2414 | 2414 | } |
@@ -2419,7 +2419,7 @@ discard block |
||
2419 | 2419 | * @param string $msg the message |
2420 | 2420 | */ |
2421 | 2421 | function CLI_debug($msg) { |
2422 | - if(intval($this->extensionSettings['processDebug'])) { |
|
2422 | + if (intval($this->extensionSettings['processDebug'])) { |
|
2423 | 2423 | echo $msg."\n"; flush(); |
2424 | 2424 | } |
2425 | 2425 | } |
@@ -2438,7 +2438,7 @@ discard block |
||
2438 | 2438 | |
2439 | 2439 | $cmd = escapeshellcmd($this->extensionSettings['phpPath']); |
2440 | 2440 | $cmd .= ' '; |
2441 | - $cmd .= escapeshellarg(\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::extPath('crawler') . 'cli/bootstrap.php'); |
|
2441 | + $cmd .= escapeshellarg(\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::extPath('crawler').'cli/bootstrap.php'); |
|
2442 | 2442 | $cmd .= ' '; |
2443 | 2443 | $cmd .= escapeshellarg($this->getFrontendBasePath()); |
2444 | 2444 | $cmd .= ' '; |
@@ -2448,10 +2448,10 @@ discard block |
||
2448 | 2448 | |
2449 | 2449 | $startTime = microtime(true); |
2450 | 2450 | $content = $this->executeShellCommand($cmd); |
2451 | - $this->log($url . (microtime(true) - $startTime)); |
|
2451 | + $this->log($url.(microtime(true) - $startTime)); |
|
2452 | 2452 | |
2453 | 2453 | $result = array( |
2454 | - 'request' => implode("\r\n", $requestHeaders) . "\r\n\r\n", |
|
2454 | + 'request' => implode("\r\n", $requestHeaders)."\r\n\r\n", |
|
2455 | 2455 | 'headers' => '', |
2456 | 2456 | 'content' => $content |
2457 | 2457 | ); |
@@ -2471,7 +2471,7 @@ discard block |
||
2471 | 2471 | $scheduledAgeInSeconds = $this->extensionSettings['cleanUpScheduledAge'] * 86400; |
2472 | 2472 | |
2473 | 2473 | $now = time(); |
2474 | - $condition = '(exec_time<>0 AND exec_time<' . ($now - $processedAgeInSeconds) . ') OR scheduled<=' . ($now - $scheduledAgeInSeconds); |
|
2474 | + $condition = '(exec_time<>0 AND exec_time<'.($now - $processedAgeInSeconds).') OR scheduled<='.($now - $scheduledAgeInSeconds); |
|
2475 | 2475 | $this->flushQueue($condition); |
2476 | 2476 | } |
2477 | 2477 | |
@@ -2490,7 +2490,7 @@ discard block |
||
2490 | 2490 | $GLOBALS['TT']->start(); |
2491 | 2491 | } |
2492 | 2492 | |
2493 | - $GLOBALS['TSFE'] = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\Controller\\TypoScriptFrontendController', $GLOBALS['TYPO3_CONF_VARS'], $id, $typeNum); |
|
2493 | + $GLOBALS['TSFE'] = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\Controller\\TypoScriptFrontendController', $GLOBALS['TYPO3_CONF_VARS'], $id, $typeNum); |
|
2494 | 2494 | $GLOBALS['TSFE']->sys_page = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\Page\\PageRepository'); |
2495 | 2495 | $GLOBALS['TSFE']->sys_page->init(TRUE); |
2496 | 2496 | $GLOBALS['TSFE']->connectToDB(); |
@@ -2503,6 +2503,6 @@ discard block |
||
2503 | 2503 | } |
2504 | 2504 | } |
2505 | 2505 | |
2506 | -if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/class.tx_crawler_lib.php']) { |
|
2506 | +if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/class.tx_crawler_lib.php']) { |
|
2507 | 2507 | include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/class.tx_crawler_lib.php']); |
2508 | 2508 | } |
@@ -1,5 +1,7 @@ |
||
1 | 1 | <?php |
2 | -if (!defined('TYPO3_REQUESTTYPE')) die('You cannot run this script directly!'); |
|
2 | +if (!defined('TYPO3_REQUESTTYPE')) { |
|
3 | + die('You cannot run this script directly!'); |
|
4 | +} |
|
3 | 5 | |
4 | 6 | $crawlerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_lib'); |
5 | 7 | $crawlerObj->CLI_main_flush($_SERVER["argv"]); |
@@ -1,5 +1,7 @@ |
||
1 | 1 | <?php |
2 | -if (!defined('TYPO3_REQUESTTYPE')) die('You cannot run this script directly!'); |
|
2 | +if (!defined('TYPO3_REQUESTTYPE')) { |
|
3 | + die('You cannot run this script directly!'); |
|
4 | +} |
|
3 | 5 | |
4 | 6 | $crawlerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_lib'); |
5 | 7 | $crawlerObj->CLI_main_im($_SERVER["argv"]); |
@@ -1,5 +1,7 @@ |
||
1 | 1 | <?php |
2 | -if (!defined('TYPO3_REQUESTTYPE')) die('You cannot run this script directly!'); |
|
2 | +if (!defined('TYPO3_REQUESTTYPE')) { |
|
3 | + die('You cannot run this script directly!'); |
|
4 | +} |
|
3 | 5 | |
4 | 6 | $crawlerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_lib'); |
5 | 7 |
@@ -60,7 +60,7 @@ discard block |
||
60 | 60 | $additionalParameters = array(); |
61 | 61 | $additionalParameters[] = 'SET[function]=tx_crawler_modfunc1'; |
62 | 62 | $additionalParameters[] = 'SET[crawlaction]=start'; |
63 | - $additionalParameters[] = 'configurationSelection[]=' . $crawlerConfiguration['name']; |
|
63 | + $additionalParameters[] = 'configurationSelection[]='.$crawlerConfiguration['name']; |
|
64 | 64 | |
65 | 65 | $additionalMenuItems = array(); |
66 | 66 | $additionalMenuItems[] = $backRef->linkItem( |
@@ -69,7 +69,7 @@ discard block |
||
69 | 69 | 'crawler' |
70 | 70 | ), |
71 | 71 | $this->getContextMenuIcon(), |
72 | - 'top.goToModule(\'web_info\', 1, \'&' . implode('&', $additionalParameters) . '\'); return hideCM();' |
|
72 | + 'top.goToModule(\'web_info\', 1, \'&'.implode('&', $additionalParameters).'\'); return hideCM();' |
|
73 | 73 | ); |
74 | 74 | |
75 | 75 | return array_merge($menuItems, $additionalMenuItems); |
@@ -84,7 +84,7 @@ discard block |
||
84 | 84 | { |
85 | 85 | $icon = sprintf( |
86 | 86 | '<img src="%s" border="0" align="top" alt="" />', |
87 | - ExtensionManagementUtility::siteRelPath('crawler') . 'icon_tx_crawler_configuration.gif' |
|
87 | + ExtensionManagementUtility::siteRelPath('crawler').'icon_tx_crawler_configuration.gif' |
|
88 | 88 | ); |
89 | 89 | |
90 | 90 | return $icon; |
@@ -39,7 +39,7 @@ discard block |
||
39 | 39 | { |
40 | 40 | if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['procInstructions'])) { |
41 | 41 | foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['procInstructions'] as $key => $value) { |
42 | - $configuration['items'][] = array($value . ' [' . $key . ']', $key, $this->getExtensionIcon($key)); |
|
42 | + $configuration['items'][] = array($value.' ['.$key.']', $key, $this->getExtensionIcon($key)); |
|
43 | 43 | } |
44 | 44 | } |
45 | 45 | |
@@ -59,8 +59,8 @@ discard block |
||
59 | 59 | if (method_exists('TYPO3\\CMS\\Core\\Utility\\ExtensionManagementUtility', 'getExtensionKeyByPrefix')) { |
60 | 60 | $parts = explode('_', $key); |
61 | 61 | if (is_array($parts) && count($parts) > 2) { |
62 | - $extensionKey = ExtensionManagementUtility::getExtensionKeyByPrefix('tx_' . $parts[1]); |
|
63 | - $extIcon = ExtensionManagementUtility::siteRelPath($extensionKey) . 'ext_icon.gif'; |
|
62 | + $extensionKey = ExtensionManagementUtility::getExtensionKeyByPrefix('tx_'.$parts[1]); |
|
63 | + $extIcon = ExtensionManagementUtility::siteRelPath($extensionKey).'ext_icon.gif'; |
|
64 | 64 | } |
65 | 65 | } |
66 | 66 |
@@ -26,7 +26,7 @@ discard block |
||
26 | 26 | * Manages cralwer processes and can be used to start a new process or multiple processes |
27 | 27 | * |
28 | 28 | */ |
29 | -class tx_crawler_domain_process_manager { |
|
29 | +class tx_crawler_domain_process_manager { |
|
30 | 30 | |
31 | 31 | /** |
32 | 32 | * @var $timeToLive integer |
@@ -83,21 +83,21 @@ discard block |
||
83 | 83 | * |
84 | 84 | * @throws RuntimeException |
85 | 85 | */ |
86 | - public function multiProcess( $timeout ) { |
|
86 | + public function multiProcess($timeout) { |
|
87 | 87 | |
88 | 88 | if ($this->processLimit <= 1) { |
89 | - throw new RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL); |
|
89 | + throw new RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.'.PHP_EOL); |
|
90 | 90 | } |
91 | 91 | |
92 | 92 | $pendingItemsStart = $this->queueRepository->countAllPendingItems(); |
93 | 93 | $itemReportLimit = 20; |
94 | - $reportItemCount = $pendingItemsStart - $itemReportLimit; |
|
94 | + $reportItemCount = $pendingItemsStart - $itemReportLimit; |
|
95 | 95 | if ($this->verbose) { |
96 | 96 | $this->reportItemStatus(); |
97 | 97 | } |
98 | 98 | $this->startRequiredProcesses(); |
99 | 99 | $nextTimeOut = time() + $this->timeToLive; |
100 | - for ($i=0; $i<$timeout; $i++) { |
|
100 | + for ($i = 0; $i < $timeout; $i++) { |
|
101 | 101 | $currentPendingItems = $this->queueRepository->countAllPendingItems(); |
102 | 102 | if ($this->startRequiredProcesses($this->verbose)) { |
103 | 103 | $nextTimeOut = time() + $this->timeToLive; |
@@ -112,16 +112,16 @@ discard block |
||
112 | 112 | if ($this->verbose) { |
113 | 113 | $this->reportItemStatus(); |
114 | 114 | } |
115 | - $reportItemCount = $currentPendingItems - $itemReportLimit; |
|
115 | + $reportItemCount = $currentPendingItems - $itemReportLimit; |
|
116 | 116 | } |
117 | 117 | sleep(1); |
118 | 118 | if ($nextTimeOut < time()) { |
119 | - $timedOutProcesses = $this->processRepository->findAll('','DESC',NULL,0,'ttl >'.$nextTimeOut); |
|
119 | + $timedOutProcesses = $this->processRepository->findAll('', 'DESC', NULL, 0, 'ttl >'.$nextTimeOut); |
|
120 | 120 | $nextTimeOut = time() + $this->timeToLive; |
121 | 121 | if ($this->verbose) { |
122 | - echo 'Cleanup'.implode(',',$timedOutProcesses->getProcessIds()).chr(10); |
|
122 | + echo 'Cleanup'.implode(',', $timedOutProcesses->getProcessIds()).chr(10); |
|
123 | 123 | } |
124 | - $this->crawlerObj->CLI_releaseProcesses($timedOutProcesses->getProcessIds(),true); |
|
124 | + $this->crawlerObj->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true); |
|
125 | 125 | } |
126 | 126 | } |
127 | 127 | if ($currentPendingItems > 0 && $this->verbose) { |
@@ -143,17 +143,17 @@ discard block |
||
143 | 143 | */ |
144 | 144 | private function startRequiredProcesses() { |
145 | 145 | $ret = FALSE; |
146 | - $currentProcesses= $this->processRepository->countActive(); |
|
147 | - $availableProcessesCount = $this->processLimit-$currentProcesses; |
|
146 | + $currentProcesses = $this->processRepository->countActive(); |
|
147 | + $availableProcessesCount = $this->processLimit - $currentProcesses; |
|
148 | 148 | $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun); |
149 | - $startProcessCount = min(array($availableProcessesCount,$requiredProcessesCount)); |
|
149 | + $startProcessCount = min(array($availableProcessesCount, $requiredProcessesCount)); |
|
150 | 150 | if ($startProcessCount <= 0) { |
151 | 151 | return $ret; |
152 | 152 | } |
153 | 153 | if ($startProcessCount && $this->verbose) { |
154 | 154 | echo 'Start '.$startProcessCount.' new processes (Running:'.$currentProcesses.')'; |
155 | 155 | } |
156 | - for($i=0;$i<$startProcessCount;$i++) { |
|
156 | + for ($i = 0; $i < $startProcessCount; $i++) { |
|
157 | 157 | usleep(100); |
158 | 158 | if ($this->startProcess()) { |
159 | 159 | if ($this->verbose) { |
@@ -173,14 +173,14 @@ discard block |
||
173 | 173 | * @throws Exception if no crawlerprocess was started |
174 | 174 | */ |
175 | 175 | public function startProcess() { |
176 | - $ttl = (time() + $this->timeToLive -1); |
|
176 | + $ttl = (time() + $this->timeToLive - 1); |
|
177 | 177 | $current = $this->processRepository->countNotTimeouted($ttl); |
178 | - $completePath = '(' .escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null'; |
|
178 | + $completePath = '('.escapeshellcmd($this->getCrawlerCliPath()).' &) > /dev/null'; |
|
179 | 179 | if (system($completePath) === FALSE) { |
180 | 180 | throw new Exception('could not start process!'); |
181 | 181 | } |
182 | 182 | else { |
183 | - for ($i=0;$i<10;$i++) { |
|
183 | + for ($i = 0; $i < 10; $i++) { |
|
184 | 184 | if ($this->processRepository->countNotTimeouted($ttl) > $current) { |
185 | 185 | return true; |
186 | 186 | } |
@@ -195,10 +195,10 @@ discard block |
||
195 | 195 | * |
196 | 196 | * @return string |
197 | 197 | */ |
198 | - public function getCrawlerCliPath(){ |
|
199 | - $phpPath = $this->crawlerObj->extensionSettings['phpPath'] . ' '; |
|
198 | + public function getCrawlerCliPath() { |
|
199 | + $phpPath = $this->crawlerObj->extensionSettings['phpPath'].' '; |
|
200 | 200 | $pathToTypo3 = rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_DOCUMENT_ROOT'), '/'); |
201 | - $pathToTypo3 .= rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_PATH'), '/'); |
|
201 | + $pathToTypo3 .= rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_PATH'), '/'); |
|
202 | 202 | $cliPart = '/typo3/cli_dispatch.phpsh crawler'; |
203 | 203 | return $phpPath.$pathToTypo3.$cliPart; |
204 | 204 | } |
@@ -178,8 +178,7 @@ |
||
178 | 178 | $completePath = '(' .escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null'; |
179 | 179 | if (system($completePath) === FALSE) { |
180 | 180 | throw new Exception('could not start process!'); |
181 | - } |
|
182 | - else { |
|
181 | + } else { |
|
183 | 182 | for ($i=0;$i<10;$i++) { |
184 | 183 | if ($this->processRepository->countNotTimeouted($ttl) > $current) { |
185 | 184 | return true; |