@@ -5,20 +5,20 @@ discard block |
||
5 | 5 | * Retrieve path (taken from cli_dispatch.phpsh) |
6 | 6 | */ |
7 | 7 | |
8 | - // Get path to this script |
|
8 | + // Get path to this script |
|
9 | 9 | $tempPathThisScript = isset($_SERVER['argv'][0]) ? $_SERVER['argv'][0] : (isset($_ENV['_']) ? $_ENV['_'] : $_SERVER['_']); |
10 | 10 | |
11 | - // Resolve path |
|
11 | + // Resolve path |
|
12 | 12 | if (!isAbsPath($tempPathThisScript)) { |
13 | - $workingDirectory = $_SERVER['PWD'] ? $_SERVER['PWD'] : getcwd(); |
|
14 | - if ($workingDirectory) { |
|
15 | - $tempPathThisScript = $workingDirectory . '/' . preg_replace('/\.\//', '', $tempPathThisScript); |
|
16 | - if (!@is_file($tempPathThisScript)) { |
|
17 | - die('Relative path found, but an error occured during resolving of the absolute path: ' . $tempPathThisScript . PHP_EOL); |
|
18 | - } |
|
19 | - } else { |
|
20 | - die('Relative path found, but resolving absolute path is not supported on this platform.' . PHP_EOL); |
|
21 | - } |
|
13 | + $workingDirectory = $_SERVER['PWD'] ? $_SERVER['PWD'] : getcwd(); |
|
14 | + if ($workingDirectory) { |
|
15 | + $tempPathThisScript = $workingDirectory . '/' . preg_replace('/\.\//', '', $tempPathThisScript); |
|
16 | + if (!@is_file($tempPathThisScript)) { |
|
17 | + die('Relative path found, but an error occured during resolving of the absolute path: ' . $tempPathThisScript . PHP_EOL); |
|
18 | + } |
|
19 | + } else { |
|
20 | + die('Relative path found, but resolving absolute path is not supported on this platform.' . PHP_EOL); |
|
21 | + } |
|
22 | 22 | } |
23 | 23 | |
24 | 24 | $typo3Root = preg_replace('#typo3conf/ext/crawler/cli/bootstrap.php$#', '', $tempPathThisScript); |
@@ -30,33 +30,33 @@ discard block |
||
30 | 30 | */ |
31 | 31 | $additionalHeaders = unserialize(base64_decode($_SERVER['argv'][3])); |
32 | 32 | if (is_array($additionalHeaders)) { |
33 | - foreach ($additionalHeaders as $additionalHeader) { |
|
34 | - if (strpos($additionalHeader, ':') !== FALSE) { |
|
35 | - list($key, $value) = explode(':', $additionalHeader, 2); |
|
36 | - $key = str_replace('-', '_', strtoupper(trim($key))); |
|
37 | - if ($key != 'HOST') { |
|
38 | - $_SERVER['HTTP_' . $key] = $value; |
|
39 | - } |
|
40 | - } |
|
41 | - } |
|
33 | + foreach ($additionalHeaders as $additionalHeader) { |
|
34 | + if (strpos($additionalHeader, ':') !== FALSE) { |
|
35 | + list($key, $value) = explode(':', $additionalHeader, 2); |
|
36 | + $key = str_replace('-', '_', strtoupper(trim($key))); |
|
37 | + if ($key != 'HOST') { |
|
38 | + $_SERVER['HTTP_' . $key] = $value; |
|
39 | + } |
|
40 | + } |
|
41 | + } |
|
42 | 42 | } |
43 | 43 | |
44 | 44 | |
45 | - // put parsed query parts into $_GET array |
|
45 | + // put parsed query parts into $_GET array |
|
46 | 46 | $urlParts = parse_url($_SERVER['argv'][2]); |
47 | - // Populating $_GET |
|
47 | + // Populating $_GET |
|
48 | 48 | parse_str($urlParts['query'], $_GET); |
49 | - // Populating $_REQUEST |
|
49 | + // Populating $_REQUEST |
|
50 | 50 | parse_str($urlParts['query'], $_REQUEST); |
51 | - // Populating $_POST |
|
51 | + // Populating $_POST |
|
52 | 52 | $_POST = array(); |
53 | - // Populating $_COOKIE |
|
53 | + // Populating $_COOKIE |
|
54 | 54 | $_COOKIE = array(); |
55 | 55 | |
56 | - // Get the TYPO3_SITE_PATH of the website frontend: |
|
56 | + // Get the TYPO3_SITE_PATH of the website frontend: |
|
57 | 57 | $typo3SitePath = $_SERVER['argv'][1]; |
58 | 58 | |
59 | - // faking the environment |
|
59 | + // faking the environment |
|
60 | 60 | $_SERVER['DOCUMENT_ROOT'] = preg_replace('#' . preg_quote($typo3SitePath, '#') . '$#', '', $typo3Root); |
61 | 61 | $_SERVER['HTTP_USER_AGENT'] = 'CLI Mode'; |
62 | 62 | $_SERVER['HTTP_HOST'] = $_SERVER['SERVER_NAME'] = $urlParts['host']; |
@@ -66,15 +66,15 @@ discard block |
||
66 | 66 | $_SERVER['REQUEST_URI'] = $urlParts['path'] . (isset($urlParts['query']) ? '?' . $urlParts['query'] : ''); |
67 | 67 | $_SERVER['REQUEST_METHOD'] = 'GET'; |
68 | 68 | |
69 | - // Define a port if used in the URL: |
|
69 | + // Define a port if used in the URL: |
|
70 | 70 | if (isset($urlParts['port'])) { |
71 | - $_SERVER['HTTP_HOST'] .= ':' . $urlParts['port']; |
|
72 | - $_SERVER['SERVER_PORT'] = $urlParts['port']; |
|
71 | + $_SERVER['HTTP_HOST'] .= ':' . $urlParts['port']; |
|
72 | + $_SERVER['SERVER_PORT'] = $urlParts['port']; |
|
73 | 73 | } |
74 | 74 | |
75 | - // Define HTTPS disposal: |
|
75 | + // Define HTTPS disposal: |
|
76 | 76 | if ($urlParts['scheme'] === 'https') { |
77 | - $_SERVER['HTTPS'] = 'on'; |
|
77 | + $_SERVER['HTTPS'] = 'on'; |
|
78 | 78 | } |
79 | 79 | |
80 | 80 | chdir($typo3Root); |
@@ -88,11 +88,11 @@ discard block |
||
88 | 88 | * @return boolean |
89 | 89 | */ |
90 | 90 | function isAbsPath($path) { |
91 | - // on Windows also a path starting with a drive letter is absolute: X:/ |
|
92 | - if (stristr(PHP_OS, 'win') && substr($path, 1, 2) === ':/') { |
|
93 | - return TRUE; |
|
94 | - } |
|
91 | + // on Windows also a path starting with a drive letter is absolute: X:/ |
|
92 | + if (stristr(PHP_OS, 'win') && substr($path, 1, 2) === ':/') { |
|
93 | + return TRUE; |
|
94 | + } |
|
95 | 95 | |
96 | - // path starting with a / is always absolute, on every system |
|
97 | - return (substr($path, 0, 1) === '/'); |
|
96 | + // path starting with a / is always absolute, on every system |
|
97 | + return (substr($path, 0, 1) === '/'); |
|
98 | 98 | } |
@@ -12,12 +12,12 @@ discard block |
||
12 | 12 | if (!isAbsPath($tempPathThisScript)) { |
13 | 13 | $workingDirectory = $_SERVER['PWD'] ? $_SERVER['PWD'] : getcwd(); |
14 | 14 | if ($workingDirectory) { |
15 | - $tempPathThisScript = $workingDirectory . '/' . preg_replace('/\.\//', '', $tempPathThisScript); |
|
15 | + $tempPathThisScript = $workingDirectory.'/'.preg_replace('/\.\//', '', $tempPathThisScript); |
|
16 | 16 | if (!@is_file($tempPathThisScript)) { |
17 | - die('Relative path found, but an error occured during resolving of the absolute path: ' . $tempPathThisScript . PHP_EOL); |
|
17 | + die('Relative path found, but an error occured during resolving of the absolute path: '.$tempPathThisScript.PHP_EOL); |
|
18 | 18 | } |
19 | 19 | } else { |
20 | - die('Relative path found, but resolving absolute path is not supported on this platform.' . PHP_EOL); |
|
20 | + die('Relative path found, but resolving absolute path is not supported on this platform.'.PHP_EOL); |
|
21 | 21 | } |
22 | 22 | } |
23 | 23 | |
@@ -35,7 +35,7 @@ discard block |
||
35 | 35 | list($key, $value) = explode(':', $additionalHeader, 2); |
36 | 36 | $key = str_replace('-', '_', strtoupper(trim($key))); |
37 | 37 | if ($key != 'HOST') { |
38 | - $_SERVER['HTTP_' . $key] = $value; |
|
38 | + $_SERVER['HTTP_'.$key] = $value; |
|
39 | 39 | } |
40 | 40 | } |
41 | 41 | } |
@@ -57,18 +57,18 @@ discard block |
||
57 | 57 | $typo3SitePath = $_SERVER['argv'][1]; |
58 | 58 | |
59 | 59 | // faking the environment |
60 | -$_SERVER['DOCUMENT_ROOT'] = preg_replace('#' . preg_quote($typo3SitePath, '#') . '$#', '', $typo3Root); |
|
60 | +$_SERVER['DOCUMENT_ROOT'] = preg_replace('#'.preg_quote($typo3SitePath, '#').'$#', '', $typo3Root); |
|
61 | 61 | $_SERVER['HTTP_USER_AGENT'] = 'CLI Mode'; |
62 | 62 | $_SERVER['HTTP_HOST'] = $_SERVER['SERVER_NAME'] = $urlParts['host']; |
63 | -$_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'] = $typo3SitePath . 'index.php'; |
|
64 | -$_SERVER['SCRIPT_FILENAME'] = $_SERVER['PATH_TRANSLATED'] = $typo3Root . 'index.php'; |
|
63 | +$_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'] = $typo3SitePath.'index.php'; |
|
64 | +$_SERVER['SCRIPT_FILENAME'] = $_SERVER['PATH_TRANSLATED'] = $typo3Root.'index.php'; |
|
65 | 65 | $_SERVER['QUERY_STRING'] = (isset($urlParts['query']) ? $urlParts['query'] : ''); |
66 | -$_SERVER['REQUEST_URI'] = $urlParts['path'] . (isset($urlParts['query']) ? '?' . $urlParts['query'] : ''); |
|
66 | +$_SERVER['REQUEST_URI'] = $urlParts['path'].(isset($urlParts['query']) ? '?'.$urlParts['query'] : ''); |
|
67 | 67 | $_SERVER['REQUEST_METHOD'] = 'GET'; |
68 | 68 | |
69 | 69 | // Define a port if used in the URL: |
70 | 70 | if (isset($urlParts['port'])) { |
71 | - $_SERVER['HTTP_HOST'] .= ':' . $urlParts['port']; |
|
71 | + $_SERVER['HTTP_HOST'] .= ':'.$urlParts['port']; |
|
72 | 72 | $_SERVER['SERVER_PORT'] = $urlParts['port']; |
73 | 73 | } |
74 | 74 | |
@@ -78,7 +78,7 @@ discard block |
||
78 | 78 | } |
79 | 79 | |
80 | 80 | chdir($typo3Root); |
81 | -include($typo3Root . '/index.php'); |
|
81 | +include($typo3Root.'/index.php'); |
|
82 | 82 | |
83 | 83 | |
84 | 84 | /** |
@@ -37,30 +37,30 @@ |
||
37 | 37 | */ |
38 | 38 | class tx_crawler_cli_flush extends \TYPO3\CMS\Core\Controller\CommandLineController { |
39 | 39 | |
40 | - /** |
|
41 | - * Constructor |
|
42 | - * |
|
43 | - * @return void |
|
44 | - */ |
|
45 | - function __construct() { |
|
46 | - parent::__construct(); |
|
40 | + /** |
|
41 | + * Constructor |
|
42 | + * |
|
43 | + * @return void |
|
44 | + */ |
|
45 | + function __construct() { |
|
46 | + parent::__construct(); |
|
47 | 47 | |
48 | - // Adding options to help archive: |
|
49 | - $this->cli_options[] = array('-o mode', 'Output mode: "finished", "all", "pending"', "Specifies the type queue entries which is flushed in the process."); |
|
50 | - # $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors"); |
|
48 | + // Adding options to help archive: |
|
49 | + $this->cli_options[] = array('-o mode', 'Output mode: "finished", "all", "pending"', "Specifies the type queue entries which is flushed in the process."); |
|
50 | + # $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors"); |
|
51 | 51 | |
52 | - // Setting help texts: |
|
53 | - $this->cli_help['name'] = 'crawler CLI interface -- Cleaning up the queue.'; |
|
54 | - $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
|
55 | - $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It will remove queue entires and perform a cleanup."; |
|
56 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 5 -o=finished\nWill remove all finished queue-entries in the sub-branch of page 5\n"; |
|
57 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 0 -o=all\nWill remove all queue-entries for every page\n"; |
|
58 | - $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
|
59 | - } |
|
52 | + // Setting help texts: |
|
53 | + $this->cli_help['name'] = 'crawler CLI interface -- Cleaning up the queue.'; |
|
54 | + $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
|
55 | + $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It will remove queue entires and perform a cleanup."; |
|
56 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 5 -o=finished\nWill remove all finished queue-entries in the sub-branch of page 5\n"; |
|
57 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 0 -o=all\nWill remove all queue-entries for every page\n"; |
|
58 | + $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
|
59 | + } |
|
60 | 60 | } |
61 | 61 | |
62 | 62 | if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_flush.php']) { |
63 | - include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_flush.php']); |
|
63 | + include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_flush.php']); |
|
64 | 64 | } |
65 | 65 | |
66 | 66 | ?> |
@@ -1149,7 +1149,7 @@ discard block |
||
1149 | 1149 | return time(); |
1150 | 1150 | } |
1151 | 1151 | |
1152 | - /************************************ |
|
1152 | + /************************************ |
|
1153 | 1153 | * |
1154 | 1154 | * URL reading |
1155 | 1155 | * |
@@ -1316,7 +1316,7 @@ discard block |
||
1316 | 1316 | return FALSE; |
1317 | 1317 | } |
1318 | 1318 | |
1319 | - // direct request |
|
1319 | + // direct request |
|
1320 | 1320 | if ($this->extensionSettings['makeDirectRequests']) { |
1321 | 1321 | $result = $this->sendDirectRequest($originalUrl, $crawlerId); |
1322 | 1322 | return $result; |
@@ -2328,7 +2328,7 @@ discard block |
||
2328 | 2328 | * |
2329 | 2329 | * @return void |
2330 | 2330 | */ |
2331 | - public function CLI_deleteProcessesMarkedDeleted() { |
|
2331 | + public function CLI_deleteProcessesMarkedDeleted() { |
|
2332 | 2332 | $this->db->exec_DELETEquery('tx_crawler_process', 'deleted = 1'); |
2333 | 2333 | } |
2334 | 2334 |
@@ -640,7 +640,9 @@ discard block |
||
640 | 640 | $sets = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.']; |
641 | 641 | if(is_array($sets)) { |
642 | 642 | foreach($sets as $key=>$value) { |
643 | - if(!is_array($value)) continue; |
|
643 | + if(!is_array($value)) { |
|
644 | + continue; |
|
645 | + } |
|
644 | 646 | $configurationsForBranch[] = substr($key,-1)=='.'?substr($key,0,-1):$key; |
645 | 647 | } |
646 | 648 | } |
@@ -987,7 +989,9 @@ discard block |
||
987 | 989 | */ |
988 | 990 | public function addQueueEntry_callBack($setId,$params,$callBack,$page_id=0,$schedule=0) { |
989 | 991 | |
990 | - if (!is_array($params)) $params = array(); |
|
992 | + if (!is_array($params)) { |
|
993 | + $params = array(); |
|
994 | + } |
|
991 | 995 | $params['_CALLBACKOBJ'] = $callBack; |
992 | 996 | |
993 | 997 | // Compile value array: |
@@ -1079,7 +1083,7 @@ discard block |
||
1079 | 1083 | $rows[] = $uid; |
1080 | 1084 | $urlAdded = true; |
1081 | 1085 | tx_crawler_domain_events_dispatcher::getInstance()->post('urlAddedToQueue',$this->setID,array('uid' => $uid, 'fieldArray' => $fieldArray)); |
1082 | - }else{ |
|
1086 | + } else{ |
|
1083 | 1087 | tx_crawler_domain_events_dispatcher::getInstance()->post('duplicateUrlInQueue',$this->setID,array('rows' => $rows, 'fieldArray' => $fieldArray)); |
1084 | 1088 | } |
1085 | 1089 | } |
@@ -1108,7 +1112,7 @@ discard block |
||
1108 | 1112 | $timeBegin = $currentTime - 100; |
1109 | 1113 | $timeEnd = $currentTime + 100; |
1110 | 1114 | $where = ' ((scheduled BETWEEN '.$timeBegin.' AND '.$timeEnd.' ) OR scheduled <= '. $currentTime.') '; |
1111 | - }else{ |
|
1115 | + } else{ |
|
1112 | 1116 | $where = 'scheduled <= ' . $currentTime; |
1113 | 1117 | } |
1114 | 1118 | } elseif ($tstamp > $currentTime) { |
@@ -1301,18 +1305,24 @@ discard block |
||
1301 | 1305 | */ |
1302 | 1306 | public function requestUrl($originalUrl, $crawlerId, $timeout=2, $recursion=10) { |
1303 | 1307 | |
1304 | - if (!$recursion) return false; |
|
1308 | + if (!$recursion) { |
|
1309 | + return false; |
|
1310 | + } |
|
1305 | 1311 | |
1306 | 1312 | // Parse URL, checking for scheme: |
1307 | 1313 | $url = parse_url($originalUrl); |
1308 | 1314 | |
1309 | 1315 | if ($url === FALSE) { |
1310 | - if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Could not parse_url() for string "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1316 | + if (TYPO3_DLOG) { |
|
1317 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Could not parse_url() for string "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1318 | + } |
|
1311 | 1319 | return FALSE; |
1312 | 1320 | } |
1313 | 1321 | |
1314 | 1322 | if (!in_array($url['scheme'], array('','http','https'))) { |
1315 | - if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Scheme does not match for url "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1323 | + if (TYPO3_DLOG) { |
|
1324 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Scheme does not match for url "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1325 | + } |
|
1316 | 1326 | return FALSE; |
1317 | 1327 | } |
1318 | 1328 | |
@@ -1346,7 +1356,9 @@ discard block |
||
1346 | 1356 | $fp = fsockopen($host, $port, $errno, $errstr, $timeout); |
1347 | 1357 | |
1348 | 1358 | if (!$fp) { |
1349 | - if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1359 | + if (TYPO3_DLOG) { |
|
1360 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1361 | + } |
|
1350 | 1362 | return FALSE; |
1351 | 1363 | } else { |
1352 | 1364 | // Request message: |
@@ -1374,7 +1386,9 @@ discard block |
||
1374 | 1386 | if (is_array($newRequestUrl)) { |
1375 | 1387 | $result = array_merge(array('parentRequest'=>$result), $newRequestUrl); |
1376 | 1388 | } else { |
1377 | - if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1389 | + if (TYPO3_DLOG) { |
|
1390 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Error while opening "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
|
1391 | + } |
|
1378 | 1392 | return FALSE; |
1379 | 1393 | } |
1380 | 1394 | } |
@@ -1500,20 +1514,32 @@ discard block |
||
1500 | 1514 | * @return string URL from redirection |
1501 | 1515 | */ |
1502 | 1516 | protected function getRequestUrlFrom302Header($headers,$user='',$pass='') { |
1503 | - if(!is_array($headers)) return false; |
|
1504 | - if(!(stristr($headers[0],'301 Moved') || stristr($headers[0],'302 Found') || stristr($headers[0],'302 Moved'))) return false; |
|
1517 | + if(!is_array($headers)) { |
|
1518 | + return false; |
|
1519 | + } |
|
1520 | + if(!(stristr($headers[0],'301 Moved') || stristr($headers[0],'302 Found') || stristr($headers[0],'302 Moved'))) { |
|
1521 | + return false; |
|
1522 | + } |
|
1505 | 1523 | |
1506 | 1524 | foreach($headers as $hl) { |
1507 | 1525 | $tmp = explode(": ",$hl); |
1508 | 1526 | $header[trim($tmp[0])] = trim($tmp[1]); |
1509 | - if(trim($tmp[0])=='Location') break; |
|
1527 | + if(trim($tmp[0])=='Location') { |
|
1528 | + break; |
|
1529 | + } |
|
1530 | + } |
|
1531 | + if(!array_key_exists('Location',$header)) { |
|
1532 | + return false; |
|
1510 | 1533 | } |
1511 | - if(!array_key_exists('Location',$header)) return false; |
|
1512 | 1534 | |
1513 | 1535 | if($user!='') { |
1514 | - if(!($tmp = parse_url($header['Location']))) return false; |
|
1536 | + if(!($tmp = parse_url($header['Location']))) { |
|
1537 | + return false; |
|
1538 | + } |
|
1515 | 1539 | $newUrl = $tmp['scheme'] . '://' . $user . ':' . $pass . '@' . $tmp['host'] . $tmp['path']; |
1516 | - if($tmp['query']!='') $newUrl .= '?' . $tmp['query']; |
|
1540 | + if($tmp['query']!='') { |
|
1541 | + $newUrl .= '?' . $tmp['query']; |
|
1542 | + } |
|
1517 | 1543 | } else { |
1518 | 1544 | $newUrl = $header['Location']; |
1519 | 1545 | } |
@@ -1942,7 +1968,7 @@ discard block |
||
1942 | 1968 | $configurations = $this->getUrlsForPageId($pageId); |
1943 | 1969 | if(is_array($configurations)){ |
1944 | 1970 | $configurationKeys = array_keys($configurations); |
1945 | - }else{ |
|
1971 | + } else{ |
|
1946 | 1972 | $configurationKeys = array(); |
1947 | 1973 | } |
1948 | 1974 | } |
@@ -2274,7 +2300,9 @@ discard block |
||
2274 | 2300 | return false; //nothing to release |
2275 | 2301 | } |
2276 | 2302 | |
2277 | - if(!$withinLock) $this->db->sql_query('BEGIN'); |
|
2303 | + if(!$withinLock) { |
|
2304 | + $this->db->sql_query('BEGIN'); |
|
2305 | + } |
|
2278 | 2306 | |
2279 | 2307 | // some kind of 2nd chance algo - this way you need at least 2 processes to have a real cleanup |
2280 | 2308 | // this ensures that a single process can't mess up the entire process table |
@@ -2318,7 +2346,9 @@ discard block |
||
2318 | 2346 | ) |
2319 | 2347 | ); |
2320 | 2348 | |
2321 | - if(!$withinLock) $this->db->sql_query('COMMIT'); |
|
2349 | + if(!$withinLock) { |
|
2350 | + $this->db->sql_query('COMMIT'); |
|
2351 | + } |
|
2322 | 2352 | |
2323 | 2353 | return true; |
2324 | 2354 | } |
@@ -29,8 +29,8 @@ discard block |
||
29 | 29 | class tx_crawler_lib { |
30 | 30 | |
31 | 31 | var $setID = 0; |
32 | - var $processID =''; |
|
33 | - var $max_CLI_exec_time = 3600; // One hour is max stalled time for the CLI (If the process has had the status "start" for 3600 seconds it will be regarded stalled and a new process is started. |
|
32 | + var $processID = ''; |
|
33 | + var $max_CLI_exec_time = 3600; // One hour is max stalled time for the CLI (If the process has had the status "start" for 3600 seconds it will be regarded stalled and a new process is started. |
|
34 | 34 | |
35 | 35 | var $duplicateTrack = array(); |
36 | 36 | var $downloadUrls = array(); |
@@ -43,9 +43,9 @@ discard block |
||
43 | 43 | var $queueEntries = array(); |
44 | 44 | var $urlList = array(); |
45 | 45 | |
46 | - var $debugMode=FALSE; |
|
46 | + var $debugMode = FALSE; |
|
47 | 47 | |
48 | - var $extensionSettings=array(); |
|
48 | + var $extensionSettings = array(); |
|
49 | 49 | |
50 | 50 | var $MP = false; // mount point |
51 | 51 | |
@@ -69,9 +69,9 @@ discard block |
||
69 | 69 | private $backendUser; |
70 | 70 | |
71 | 71 | const CLI_STATUS_NOTHING_PROCCESSED = 0; |
72 | - const CLI_STATUS_REMAIN = 1; //queue not empty |
|
73 | - const CLI_STATUS_PROCESSED = 2; //(some) queue items where processed |
|
74 | - const CLI_STATUS_ABORTED = 4; //instance didn't finish |
|
72 | + const CLI_STATUS_REMAIN = 1; //queue not empty |
|
73 | + const CLI_STATUS_PROCESSED = 2; //(some) queue items where processed |
|
74 | + const CLI_STATUS_ABORTED = 4; //instance didn't finish |
|
75 | 75 | const CLI_STATUS_POLLABLE_PROCESSED = 8; |
76 | 76 | |
77 | 77 | /** |
@@ -162,7 +162,7 @@ discard block |
||
162 | 162 | $this->extensionSettings['countInARun'] = 100; |
163 | 163 | } |
164 | 164 | |
165 | - $this->extensionSettings['processLimit'] = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($this->extensionSettings['processLimit'],1,99,1); |
|
165 | + $this->extensionSettings['processLimit'] = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($this->extensionSettings['processLimit'], 1, 99, 1); |
|
166 | 166 | } |
167 | 167 | |
168 | 168 | /** |
@@ -195,7 +195,7 @@ discard block |
||
195 | 195 | } |
196 | 196 | |
197 | 197 | if (!$skipPage) { |
198 | - if (\TYPO3\CMS\Core\Utility\GeneralUtility::inList('3,4', $pageRow['doktype']) || $pageRow['doktype']>=199) { |
|
198 | + if (\TYPO3\CMS\Core\Utility\GeneralUtility::inList('3,4', $pageRow['doktype']) || $pageRow['doktype'] >= 199) { |
|
199 | 199 | $skipPage = true; |
200 | 200 | $skipMessage = 'Because doktype is not allowed'; |
201 | 201 | } |
@@ -216,13 +216,13 @@ discard block |
||
216 | 216 | if (!$skipPage) { |
217 | 217 | // veto hook |
218 | 218 | if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pageVeto'])) { |
219 | - foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pageVeto'] as $key => $func) { |
|
219 | + foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pageVeto'] as $key => $func) { |
|
220 | 220 | $params = array( |
221 | 221 | 'pageRow' => $pageRow |
222 | 222 | ); |
223 | 223 | // expects "false" if page is ok and "true" or a skipMessage if this page should _not_ be crawled |
224 | 224 | $veto = \TYPO3\CMS\Core\Utility\GeneralUtility::callUserFunction($func, $params, $this); |
225 | - if ($veto !== false) { |
|
225 | + if ($veto !== false) { |
|
226 | 226 | $skipPage = true; |
227 | 227 | if (is_string($veto)) { |
228 | 228 | $skipMessage = $veto; |
@@ -271,9 +271,9 @@ discard block |
||
271 | 271 | * @param string $configurationHash |
272 | 272 | * @return boolean |
273 | 273 | */ |
274 | - protected function noUnprocessedQueueEntriesForPageWithConfigurationHashExist($uid,$configurationHash) { |
|
275 | - $configurationHash = $this->db->fullQuoteStr($configurationHash,'tx_crawler_queue'); |
|
276 | - $res = $this->db->exec_SELECTquery('count(*) as anz','tx_crawler_queue',"page_id=".intval($uid)." AND configuration_hash=".$configurationHash." AND exec_time=0"); |
|
274 | + protected function noUnprocessedQueueEntriesForPageWithConfigurationHashExist($uid, $configurationHash) { |
|
275 | + $configurationHash = $this->db->fullQuoteStr($configurationHash, 'tx_crawler_queue'); |
|
276 | + $res = $this->db->exec_SELECTquery('count(*) as anz', 'tx_crawler_queue', "page_id=".intval($uid)." AND configuration_hash=".$configurationHash." AND exec_time=0"); |
|
277 | 277 | $row = $this->db->sql_fetch_assoc($res); |
278 | 278 | |
279 | 279 | return ($row['anz'] == 0); |
@@ -338,26 +338,26 @@ discard block |
||
338 | 338 | } |
339 | 339 | } |
340 | 340 | |
341 | - if (is_array($vv['URLs'])) { |
|
342 | - $configurationHash = md5(serialize($vv)); |
|
343 | - $skipInnerCheck = $this->noUnprocessedQueueEntriesForPageWithConfigurationHashExist($pageRow['uid'],$configurationHash); |
|
341 | + if (is_array($vv['URLs'])) { |
|
342 | + $configurationHash = md5(serialize($vv)); |
|
343 | + $skipInnerCheck = $this->noUnprocessedQueueEntriesForPageWithConfigurationHashExist($pageRow['uid'], $configurationHash); |
|
344 | 344 | |
345 | - foreach($vv['URLs'] as $urlQuery) { |
|
345 | + foreach ($vv['URLs'] as $urlQuery) { |
|
346 | 346 | |
347 | - if ($this->drawURLs_PIfilter($vv['subCfg']['procInstrFilter'], $incomingProcInstructions)) { |
|
347 | + if ($this->drawURLs_PIfilter($vv['subCfg']['procInstrFilter'], $incomingProcInstructions)) { |
|
348 | 348 | |
349 | 349 | // Calculate cHash: |
350 | - if ($vv['subCfg']['cHash']) { |
|
350 | + if ($vv['subCfg']['cHash']) { |
|
351 | 351 | /* @var $cacheHash \TYPO3\CMS\Frontend\Page\CacheHashCalculator */ |
352 | 352 | $cacheHash = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\CMS\Frontend\Page\CacheHashCalculator'); |
353 | - $urlQuery .= '&cHash=' . $cacheHash->generateForParameters($urlQuery); |
|
353 | + $urlQuery .= '&cHash='.$cacheHash->generateForParameters($urlQuery); |
|
354 | 354 | } |
355 | 355 | |
356 | 356 | // Create key by which to determine unique-ness: |
357 | 357 | $uKey = $urlQuery.'|'.$vv['subCfg']['userGroups'].'|'.$vv['subCfg']['baseUrl'].'|'.$vv['subCfg']['procInstrFilter']; |
358 | 358 | |
359 | 359 | // realurl support (thanks to Ingo Renner) |
360 | - $urlQuery = 'index.php' . $urlQuery; |
|
360 | + $urlQuery = 'index.php'.$urlQuery; |
|
361 | 361 | if (\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::isLoaded('realurl') && $vv['subCfg']['realurl']) { |
362 | 362 | $params = array( |
363 | 363 | 'LD' => array( |
@@ -370,8 +370,8 @@ discard block |
||
370 | 370 | } |
371 | 371 | |
372 | 372 | // Scheduled time: |
373 | - $schTime = $scheduledTime + round(count($duplicateTrack)*(60/$reqMinute)); |
|
374 | - $schTime = floor($schTime/60)*60; |
|
373 | + $schTime = $scheduledTime + round(count($duplicateTrack) * (60 / $reqMinute)); |
|
374 | + $schTime = floor($schTime / 60) * 60; |
|
375 | 375 | |
376 | 376 | if (isset($duplicateTrack[$uKey])) { |
377 | 377 | |
@@ -383,10 +383,10 @@ discard block |
||
383 | 383 | $urlList = '['.date('d.m.y H:i', $schTime).'] '.htmlspecialchars($urlQuery); |
384 | 384 | $this->urlList[] = '['.date('d.m.y H:i', $schTime).'] '.$urlQuery; |
385 | 385 | |
386 | - $theUrl = ($vv['subCfg']['baseUrl'] ? $vv['subCfg']['baseUrl'] : \TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_URL')) . $urlQuery; |
|
386 | + $theUrl = ($vv['subCfg']['baseUrl'] ? $vv['subCfg']['baseUrl'] : \TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_URL')).$urlQuery; |
|
387 | 387 | |
388 | 388 | // Submit for crawling! |
389 | - if ($submitCrawlUrls) { |
|
389 | + if ($submitCrawlUrls) { |
|
390 | 390 | $added = $this->addUrl( |
391 | 391 | $pageRow['uid'], |
392 | 392 | $theUrl, |
@@ -398,7 +398,7 @@ discard block |
||
398 | 398 | if ($added === false) { |
399 | 399 | $urlList .= ' (Url already existed)'; |
400 | 400 | } |
401 | - } elseif ($downloadCrawlUrls) { |
|
401 | + } elseif ($downloadCrawlUrls) { |
|
402 | 402 | $downloadUrls[$theUrl] = $theUrl; |
403 | 403 | } |
404 | 404 | |
@@ -427,7 +427,7 @@ discard block |
||
427 | 427 | return TRUE; |
428 | 428 | } |
429 | 429 | |
430 | - foreach($incomingProcInstructions as $pi) { |
|
430 | + foreach ($incomingProcInstructions as $pi) { |
|
431 | 431 | if (\TYPO3\CMS\Core\Utility\GeneralUtility::inList($piString, $pi)) { |
432 | 432 | return TRUE; |
433 | 433 | } |
@@ -440,7 +440,7 @@ discard block |
||
440 | 440 | * @return array |
441 | 441 | */ |
442 | 442 | public function getPageTSconfigForId($id) { |
443 | - if(!$this->MP){ |
|
443 | + if (!$this->MP) { |
|
444 | 444 | $pageTSconfig = \TYPO3\CMS\Backend\Utility\BackendUtility::getPagesTSconfig($id); |
445 | 445 | } else { |
446 | 446 | list(,$mountPointId) = explode('-', $this->MP); |
@@ -468,7 +468,7 @@ discard block |
||
468 | 468 | * @param integer $id Page ID |
469 | 469 | * @return array Configurations from pages and configuration records |
470 | 470 | */ |
471 | - protected function getUrlsForPageId($id) { |
|
471 | + protected function getUrlsForPageId($id) { |
|
472 | 472 | |
473 | 473 | /** |
474 | 474 | * Get configuration from tsConfig |
@@ -479,24 +479,24 @@ discard block |
||
479 | 479 | |
480 | 480 | $res = array(); |
481 | 481 | |
482 | - if (is_array($pageTSconfig) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.'])) { |
|
482 | + if (is_array($pageTSconfig) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.'])) { |
|
483 | 483 | $crawlerCfg = $pageTSconfig['tx_crawler.']['crawlerCfg.']; |
484 | 484 | |
485 | - if (is_array($crawlerCfg['paramSets.'])) { |
|
486 | - foreach($crawlerCfg['paramSets.'] as $key => $values) { |
|
487 | - if (!is_array($values)) { |
|
485 | + if (is_array($crawlerCfg['paramSets.'])) { |
|
486 | + foreach ($crawlerCfg['paramSets.'] as $key => $values) { |
|
487 | + if (!is_array($values)) { |
|
488 | 488 | |
489 | 489 | // Sub configuration for a single configuration string: |
490 | - $subCfg = (array)$crawlerCfg['paramSets.'][$key.'.']; |
|
490 | + $subCfg = (array) $crawlerCfg['paramSets.'][$key.'.']; |
|
491 | 491 | $subCfg['key'] = $key; |
492 | 492 | |
493 | - if (strcmp($subCfg['procInstrFilter'],'')) { |
|
494 | - $subCfg['procInstrFilter'] = implode(',',\TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$subCfg['procInstrFilter'])); |
|
493 | + if (strcmp($subCfg['procInstrFilter'], '')) { |
|
494 | + $subCfg['procInstrFilter'] = implode(',', \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $subCfg['procInstrFilter'])); |
|
495 | 495 | } |
496 | - $pidOnlyList = implode(',',\TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$subCfg['pidsOnly'],1)); |
|
496 | + $pidOnlyList = implode(',', \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $subCfg['pidsOnly'], 1)); |
|
497 | 497 | |
498 | 498 | // process configuration if it is not page-specific or if the specific page is the current page: |
499 | - if (!strcmp($subCfg['pidsOnly'],'') || \TYPO3\CMS\Core\Utility\GeneralUtility::inList($pidOnlyList,$id)) { |
|
499 | + if (!strcmp($subCfg['pidsOnly'], '') || \TYPO3\CMS\Core\Utility\GeneralUtility::inList($pidOnlyList, $id)) { |
|
500 | 500 | |
501 | 501 | // add trailing slash if not present |
502 | 502 | if (!empty($subCfg['baseUrl']) && substr($subCfg['baseUrl'], -1) != '/') { |
@@ -507,14 +507,14 @@ discard block |
||
507 | 507 | $res[$key] = array(); |
508 | 508 | $res[$key]['subCfg'] = $subCfg; |
509 | 509 | $res[$key]['paramParsed'] = $this->parseParams($values); |
510 | - $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'],$id); |
|
510 | + $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $id); |
|
511 | 511 | $res[$key]['origin'] = 'pagets'; |
512 | 512 | |
513 | 513 | // recognize MP value |
514 | - if(!$this->MP){ |
|
515 | - $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'],array('?id='.$id)); |
|
514 | + if (!$this->MP) { |
|
515 | + $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], array('?id='.$id)); |
|
516 | 516 | } else { |
517 | - $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'],array('?id='.$id.'&MP='.$this->MP)); |
|
517 | + $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], array('?id='.$id.'&MP='.$this->MP)); |
|
518 | 518 | } |
519 | 519 | } |
520 | 520 | } |
@@ -535,7 +535,7 @@ discard block |
||
535 | 535 | 'tx_crawler_configuration', |
536 | 536 | 'pid', |
537 | 537 | intval($page['uid']), |
538 | - \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_crawler_configuration') . \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('tx_crawler_configuration') |
|
538 | + \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_crawler_configuration').\TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('tx_crawler_configuration') |
|
539 | 539 | ); |
540 | 540 | |
541 | 541 | if (is_array($configurationRecordsForCurrentPage)) { |
@@ -544,10 +544,10 @@ discard block |
||
544 | 544 | // check access to the configuration record |
545 | 545 | if (empty($configurationRecord['begroups']) || $GLOBALS['BE_USER']->isAdmin() || $this->hasGroupAccess($GLOBALS['BE_USER']->user['usergroup_cached_list'], $configurationRecord['begroups'])) { |
546 | 546 | |
547 | - $pidOnlyList = implode(',',\TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$configurationRecord['pidsonly'],1)); |
|
547 | + $pidOnlyList = implode(',', \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $configurationRecord['pidsonly'], 1)); |
|
548 | 548 | |
549 | 549 | // process configuration if it is not page-specific or if the specific page is the current page: |
550 | - if (!strcmp($configurationRecord['pidsonly'],'') || \TYPO3\CMS\Core\Utility\GeneralUtility::inList($pidOnlyList,$id)) { |
|
550 | + if (!strcmp($configurationRecord['pidsonly'], '') || \TYPO3\CMS\Core\Utility\GeneralUtility::inList($pidOnlyList, $id)) { |
|
551 | 551 | $key = $configurationRecord['name']; |
552 | 552 | |
553 | 553 | // don't overwrite previously defined paramSets |
@@ -577,7 +577,7 @@ discard block |
||
577 | 577 | $res[$key]['subCfg'] = $subCfg; |
578 | 578 | $res[$key]['paramParsed'] = $this->parseParams($configurationRecord['configuration']); |
579 | 579 | $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $id); |
580 | - $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], array('?id=' . $id)); |
|
580 | + $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], array('?id='.$id)); |
|
581 | 581 | $res[$key]['origin'] = 'tx_crawler_configuration_'.$configurationRecord['uid']; |
582 | 582 | } |
583 | 583 | } |
@@ -587,8 +587,8 @@ discard block |
||
587 | 587 | } |
588 | 588 | } |
589 | 589 | |
590 | - if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'])) { |
|
591 | - foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'] as $func) { |
|
590 | + if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'])) { |
|
591 | + foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'] as $func) { |
|
592 | 592 | $params = array( |
593 | 593 | 'res' => &$res, |
594 | 594 | ); |
@@ -613,8 +613,8 @@ discard block |
||
613 | 613 | $res = $this->db->exec_SELECTquery( |
614 | 614 | '*', |
615 | 615 | 'sys_domain', |
616 | - 'uid = '.$sysDomainUid . |
|
617 | - \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('sys_domain') . |
|
616 | + 'uid = '.$sysDomainUid. |
|
617 | + \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('sys_domain'). |
|
618 | 618 | \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('sys_domain') |
619 | 619 | ); |
620 | 620 | $row = $this->db->sql_fetch_assoc($res); |
@@ -638,24 +638,24 @@ discard block |
||
638 | 638 | $pageTSconfig = $this->getPageTSconfigForId($rootId); |
639 | 639 | if (is_array($pageTSconfig) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.']) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.'])) { |
640 | 640 | $sets = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.']; |
641 | - if(is_array($sets)) { |
|
642 | - foreach($sets as $key=>$value) { |
|
643 | - if(!is_array($value)) continue; |
|
644 | - $configurationsForBranch[] = substr($key,-1)=='.'?substr($key,0,-1):$key; |
|
641 | + if (is_array($sets)) { |
|
642 | + foreach ($sets as $key=>$value) { |
|
643 | + if (!is_array($value)) continue; |
|
644 | + $configurationsForBranch[] = substr($key, -1) == '.' ?substr($key, 0, -1) : $key; |
|
645 | 645 | } |
646 | 646 | } |
647 | 647 | } |
648 | 648 | $pids = array(); |
649 | 649 | $rootLine = \TYPO3\CMS\Backend\Utility\BackendUtility::BEgetRootLine($rootId); |
650 | - foreach($rootLine as $node) { |
|
650 | + foreach ($rootLine as $node) { |
|
651 | 651 | $pids[] = $node['uid']; |
652 | 652 | } |
653 | 653 | /* @var \TYPO3\CMS\Backend\Tree\View\PageTreeView */ |
654 | 654 | $tree = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\CMS\Backend\Tree\View\PageTreeView'); |
655 | 655 | $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1); |
656 | - $tree->init('AND ' . $perms_clause); |
|
656 | + $tree->init('AND '.$perms_clause); |
|
657 | 657 | $tree->getTree($rootId, $depth, ''); |
658 | - foreach($tree->tree as $node) { |
|
658 | + foreach ($tree->tree as $node) { |
|
659 | 659 | $pids[] = $node['row']['uid']; |
660 | 660 | } |
661 | 661 | |
@@ -663,12 +663,12 @@ discard block |
||
663 | 663 | '*', |
664 | 664 | 'tx_crawler_configuration', |
665 | 665 | 'pid IN ('.implode(',', $pids).') '. |
666 | - \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_crawler_configuration') . |
|
666 | + \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_crawler_configuration'). |
|
667 | 667 | \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('tx_crawler_configuration').' '. |
668 | 668 | \TYPO3\CMS\Backend\Utility\BackendUtility::versioningPlaceholderClause('tx_crawler_configuration').' ' |
669 | 669 | ); |
670 | 670 | |
671 | - while($row = $this->db->sql_fetch_assoc($res)) { |
|
671 | + while ($row = $this->db->sql_fetch_assoc($res)) { |
|
672 | 672 | $configurationsForBranch[] = $row['name']; |
673 | 673 | } |
674 | 674 | $this->db->sql_free_result($res); |
@@ -690,7 +690,7 @@ discard block |
||
690 | 690 | if (empty($accessList)) { |
691 | 691 | return true; |
692 | 692 | } |
693 | - foreach(\TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',', $groupList) as $groupUid) { |
|
693 | + foreach (\TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',', $groupList) as $groupUid) { |
|
694 | 694 | if (\TYPO3\CMS\Core\Utility\GeneralUtility::inList($accessList, $groupUid)) { |
695 | 695 | return true; |
696 | 696 | } |
@@ -709,9 +709,9 @@ discard block |
||
709 | 709 | $paramKeyValues = array(); |
710 | 710 | $GETparams = explode('&', $inputQuery); |
711 | 711 | |
712 | - foreach($GETparams as $paramAndValue) { |
|
713 | - list($p,$v) = explode('=', $paramAndValue, 2); |
|
714 | - if (strlen($p)) { |
|
712 | + foreach ($GETparams as $paramAndValue) { |
|
713 | + list($p, $v) = explode('=', $paramAndValue, 2); |
|
714 | + if (strlen($p)) { |
|
715 | 715 | $paramKeyValues[rawurldecode($p)] = rawurldecode($v); |
716 | 716 | } |
717 | 717 | } |
@@ -734,84 +734,84 @@ discard block |
||
734 | 734 | * @param integer Current page ID |
735 | 735 | * @return array Array with key (GET var name) with the value being an array of all possible values for that key. |
736 | 736 | */ |
737 | - protected function expandParameters($paramArray, $pid) { |
|
737 | + protected function expandParameters($paramArray, $pid) { |
|
738 | 738 | global $TCA; |
739 | 739 | |
740 | 740 | // Traverse parameter names: |
741 | - foreach($paramArray as $p => $v) { |
|
741 | + foreach ($paramArray as $p => $v) { |
|
742 | 742 | $v = trim($v); |
743 | 743 | |
744 | 744 | // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal |
745 | - if (substr($v,0,1)==='[' && substr($v,-1)===']') { |
|
745 | + if (substr($v, 0, 1) === '[' && substr($v, -1) === ']') { |
|
746 | 746 | // So, find the value inside brackets and reset the paramArray value as an array. |
747 | - $v = substr($v,1,-1); |
|
747 | + $v = substr($v, 1, -1); |
|
748 | 748 | $paramArray[$p] = array(); |
749 | 749 | |
750 | 750 | // Explode parts and traverse them: |
751 | - $parts = explode('|',$v); |
|
752 | - foreach($parts as $pV) { |
|
751 | + $parts = explode('|', $v); |
|
752 | + foreach ($parts as $pV) { |
|
753 | 753 | |
754 | 754 | // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30) |
755 | - if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/',trim($pV),$reg)) { // Integer range: |
|
755 | + if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($pV), $reg)) { // Integer range: |
|
756 | 756 | |
757 | 757 | // Swap if first is larger than last: |
758 | - if ($reg[1] > $reg[2]) { |
|
758 | + if ($reg[1] > $reg[2]) { |
|
759 | 759 | $temp = $reg[2]; |
760 | 760 | $reg[2] = $reg[1]; |
761 | 761 | $reg[1] = $temp; |
762 | 762 | } |
763 | 763 | |
764 | 764 | // Traverse range, add values: |
765 | - $runAwayBrake = 1000; // Limit to size of range! |
|
766 | - for($a=$reg[1]; $a<=$reg[2];$a++) { |
|
765 | + $runAwayBrake = 1000; // Limit to size of range! |
|
766 | + for ($a = $reg[1]; $a <= $reg[2]; $a++) { |
|
767 | 767 | $paramArray[$p][] = $a; |
768 | 768 | $runAwayBrake--; |
769 | - if ($runAwayBrake<=0) { |
|
769 | + if ($runAwayBrake <= 0) { |
|
770 | 770 | break; |
771 | 771 | } |
772 | 772 | } |
773 | - } elseif (substr(trim($pV),0,7)=='_TABLE:') { |
|
773 | + } elseif (substr(trim($pV), 0, 7) == '_TABLE:') { |
|
774 | 774 | |
775 | 775 | // Parse parameters: |
776 | - $subparts = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(';',$pV); |
|
776 | + $subparts = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(';', $pV); |
|
777 | 777 | $subpartParams = array(); |
778 | - foreach($subparts as $spV) { |
|
779 | - list($pKey,$pVal) = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(':',$spV); |
|
778 | + foreach ($subparts as $spV) { |
|
779 | + list($pKey, $pVal) = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(':', $spV); |
|
780 | 780 | $subpartParams[$pKey] = $pVal; |
781 | 781 | } |
782 | 782 | |
783 | 783 | // Table exists: |
784 | - if (isset($TCA[$subpartParams['_TABLE']])) { |
|
784 | + if (isset($TCA[$subpartParams['_TABLE']])) { |
|
785 | 785 | $lookUpPid = isset($subpartParams['_PID']) ? intval($subpartParams['_PID']) : $pid; |
786 | 786 | $pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid'; |
787 | 787 | $where = isset($subpartParams['_WHERE']) ? $subpartParams['_WHERE'] : ''; |
788 | 788 | $addTable = isset($subpartParams['_ADDTABLE']) ? $subpartParams['_ADDTABLE'] : ''; |
789 | 789 | |
790 | 790 | $fieldName = $subpartParams['_FIELD'] ? $subpartParams['_FIELD'] : 'uid'; |
791 | - if ($fieldName==='uid' || $TCA[$subpartParams['_TABLE']]['columns'][$fieldName]) { |
|
791 | + if ($fieldName === 'uid' || $TCA[$subpartParams['_TABLE']]['columns'][$fieldName]) { |
|
792 | 792 | |
793 | 793 | $andWhereLanguage = ''; |
794 | 794 | $transOrigPointerField = $TCA[$subpartParams['_TABLE']]['ctrl']['transOrigPointerField']; |
795 | 795 | |
796 | 796 | if ($subpartParams['_ENABLELANG'] && $transOrigPointerField) { |
797 | - $andWhereLanguage = ' AND ' . $this->db->quoteStr($transOrigPointerField, $subpartParams['_TABLE']) .' <= 0 '; |
|
797 | + $andWhereLanguage = ' AND '.$this->db->quoteStr($transOrigPointerField, $subpartParams['_TABLE']).' <= 0 '; |
|
798 | 798 | } |
799 | 799 | |
800 | - $where = $this->db->quoteStr($pidField, $subpartParams['_TABLE']) .'='.intval($lookUpPid) . ' ' . |
|
801 | - $andWhereLanguage . $where; |
|
800 | + $where = $this->db->quoteStr($pidField, $subpartParams['_TABLE']).'='.intval($lookUpPid).' '. |
|
801 | + $andWhereLanguage.$where; |
|
802 | 802 | |
803 | 803 | $rows = $this->db->exec_SELECTgetRows( |
804 | 804 | $fieldName, |
805 | - $subpartParams['_TABLE'] . $addTable, |
|
806 | - $where . \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause($subpartParams['_TABLE']), |
|
805 | + $subpartParams['_TABLE'].$addTable, |
|
806 | + $where.\TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause($subpartParams['_TABLE']), |
|
807 | 807 | '', |
808 | 808 | '', |
809 | 809 | '', |
810 | 810 | $fieldName |
811 | 811 | ); |
812 | 812 | |
813 | - if (is_array($rows)) { |
|
814 | - $paramArray[$p] = array_merge($paramArray[$p],array_keys($rows)); |
|
813 | + if (is_array($rows)) { |
|
814 | + $paramArray[$p] = array_merge($paramArray[$p], array_keys($rows)); |
|
815 | 815 | } |
816 | 816 | } |
817 | 817 | } |
@@ -827,7 +827,7 @@ discard block |
||
827 | 827 | 'currentValue' => $pV, |
828 | 828 | 'pid' => $pid |
829 | 829 | ); |
830 | - foreach($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $key => $_funcRef) { |
|
830 | + foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $key => $_funcRef) { |
|
831 | 831 | \TYPO3\CMS\Core\Utility\GeneralUtility::callUserFunction($_funcRef, $_params, $this); |
832 | 832 | } |
833 | 833 | } |
@@ -863,11 +863,11 @@ discard block |
||
863 | 863 | |
864 | 864 | // Traverse value set: |
865 | 865 | $newUrls = array(); |
866 | - foreach($urls as $url) { |
|
867 | - foreach($valueSet as $val) { |
|
868 | - $newUrls[] = $url.(strcmp($val,'') ? '&'.rawurlencode($varName).'='.rawurlencode($val) : ''); |
|
866 | + foreach ($urls as $url) { |
|
867 | + foreach ($valueSet as $val) { |
|
868 | + $newUrls[] = $url.(strcmp($val, '') ? '&'.rawurlencode($varName).'='.rawurlencode($val) : ''); |
|
869 | 869 | |
870 | - if (count($newUrls) > \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000)) { |
|
870 | + if (count($newUrls) > \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000)) { |
|
871 | 871 | break; |
872 | 872 | } |
873 | 873 | } |
@@ -897,7 +897,7 @@ discard block |
||
897 | 897 | */ |
898 | 898 | public function getLogEntriesForPageId($id, $filter = '', $doFlush = FALSE, $doFullFlush = FALSE, $itemsPerPage = 10) { |
899 | 899 | // FIXME: Write Unit tests for Filters |
900 | - switch($filter) { |
|
900 | + switch ($filter) { |
|
901 | 901 | case 'pending': |
902 | 902 | $addWhere = ' AND exec_time=0'; |
903 | 903 | break; |
@@ -911,13 +911,13 @@ discard block |
||
911 | 911 | |
912 | 912 | // FIXME: Write unit test that ensures that the right records are deleted. |
913 | 913 | if ($doFlush) { |
914 | - $this->flushQueue( ($doFullFlush?'1=1':('page_id='.intval($id))) .$addWhere); |
|
914 | + $this->flushQueue(($doFullFlush ? '1=1' : ('page_id='.intval($id))).$addWhere); |
|
915 | 915 | return array(); |
916 | 916 | } else { |
917 | 917 | return $this->db->exec_SELECTgetRows('*', |
918 | 918 | 'tx_crawler_queue', |
919 | - 'page_id=' . intval($id) . $addWhere, '', 'scheduled DESC', |
|
920 | - (intval($itemsPerPage)>0 ? intval($itemsPerPage) : '')); |
|
919 | + 'page_id='.intval($id).$addWhere, '', 'scheduled DESC', |
|
920 | + (intval($itemsPerPage) > 0 ? intval($itemsPerPage) : '')); |
|
921 | 921 | } |
922 | 922 | } |
923 | 923 | |
@@ -930,9 +930,9 @@ discard block |
||
930 | 930 | * @param integer Limit the amount of entires per page default is 10 |
931 | 931 | * @return array |
932 | 932 | */ |
933 | - public function getLogEntriesForSetId($set_id,$filter='',$doFlush=FALSE, $doFullFlush=FALSE, $itemsPerPage=10) { |
|
933 | + public function getLogEntriesForSetId($set_id, $filter = '', $doFlush = FALSE, $doFullFlush = FALSE, $itemsPerPage = 10) { |
|
934 | 934 | // FIXME: Write Unit tests for Filters |
935 | - switch($filter) { |
|
935 | + switch ($filter) { |
|
936 | 936 | case 'pending': |
937 | 937 | $addWhere = ' AND exec_time=0'; |
938 | 938 | break; |
@@ -944,14 +944,14 @@ discard block |
||
944 | 944 | break; |
945 | 945 | } |
946 | 946 | // FIXME: Write unit test that ensures that the right records are deleted. |
947 | - if ($doFlush) { |
|
948 | - $this->flushQueue($doFullFlush?'':('set_id='.intval($set_id).$addWhere)); |
|
947 | + if ($doFlush) { |
|
948 | + $this->flushQueue($doFullFlush ? '' : ('set_id='.intval($set_id).$addWhere)); |
|
949 | 949 | return array(); |
950 | 950 | } else { |
951 | 951 | return $this->db->exec_SELECTgetRows('*', |
952 | 952 | 'tx_crawler_queue', |
953 | - 'set_id='.intval($set_id).$addWhere,'','scheduled DESC', |
|
954 | - (intval($itemsPerPage)>0 ? intval($itemsPerPage) : '')); |
|
953 | + 'set_id='.intval($set_id).$addWhere, '', 'scheduled DESC', |
|
954 | + (intval($itemsPerPage) > 0 ? intval($itemsPerPage) : '')); |
|
955 | 955 | } |
956 | 956 | } |
957 | 957 | |
@@ -961,14 +961,14 @@ discard block |
||
961 | 961 | * @param $where SQL related filter for the entries which should be removed |
962 | 962 | * @return void |
963 | 963 | */ |
964 | - protected function flushQueue($where='') { |
|
964 | + protected function flushQueue($where = '') { |
|
965 | 965 | |
966 | - $realWhere = strlen($where)>0?$where:'1=1'; |
|
966 | + $realWhere = strlen($where) > 0 ? $where : '1=1'; |
|
967 | 967 | |
968 | - if(tx_crawler_domain_events_dispatcher::getInstance()->hasObserver('queueEntryFlush')) { |
|
969 | - $groups = $this->db->exec_SELECTgetRows('DISTINCT set_id','tx_crawler_queue',$realWhere); |
|
970 | - foreach($groups as $group) { |
|
971 | - tx_crawler_domain_events_dispatcher::getInstance()->post('queueEntryFlush',$group['set_id'], $this->db->exec_SELECTgetRows('uid, set_id','tx_crawler_queue',$realWhere.' AND set_id="'.$group['set_id'].'"')); |
|
968 | + if (tx_crawler_domain_events_dispatcher::getInstance()->hasObserver('queueEntryFlush')) { |
|
969 | + $groups = $this->db->exec_SELECTgetRows('DISTINCT set_id', 'tx_crawler_queue', $realWhere); |
|
970 | + foreach ($groups as $group) { |
|
971 | + tx_crawler_domain_events_dispatcher::getInstance()->post('queueEntryFlush', $group['set_id'], $this->db->exec_SELECTgetRows('uid, set_id', 'tx_crawler_queue', $realWhere.' AND set_id="'.$group['set_id'].'"')); |
|
972 | 972 | } |
973 | 973 | } |
974 | 974 | |
@@ -985,7 +985,7 @@ discard block |
||
985 | 985 | * @param integer Time at which to activate |
986 | 986 | * @return void |
987 | 987 | */ |
988 | - public function addQueueEntry_callBack($setId,$params,$callBack,$page_id=0,$schedule=0) { |
|
988 | + public function addQueueEntry_callBack($setId, $params, $callBack, $page_id = 0, $schedule = 0) { |
|
989 | 989 | |
990 | 990 | if (!is_array($params)) $params = array(); |
991 | 991 | $params['_CALLBACKOBJ'] = $callBack; |
@@ -1000,7 +1000,7 @@ discard block |
||
1000 | 1000 | 'result_data' => '', |
1001 | 1001 | ); |
1002 | 1002 | |
1003 | - $this->db->exec_INSERTquery('tx_crawler_queue',$fieldArray); |
|
1003 | + $this->db->exec_INSERTquery('tx_crawler_queue', $fieldArray); |
|
1004 | 1004 | } |
1005 | 1005 | |
1006 | 1006 | /************************************ |
@@ -1020,13 +1020,13 @@ discard block |
||
1020 | 1020 | * @param bool (optional) skip inner duplication check |
1021 | 1021 | * @return bool true if the url was added, false if it already existed |
1022 | 1022 | */ |
1023 | - protected function addUrl ( |
|
1023 | + protected function addUrl( |
|
1024 | 1024 | $id, |
1025 | 1025 | $url, |
1026 | 1026 | array $subCfg, |
1027 | 1027 | $tstamp, |
1028 | - $configurationHash='', |
|
1029 | - $skipInnerDuplicationCheck=false |
|
1028 | + $configurationHash = '', |
|
1029 | + $skipInnerDuplicationCheck = false |
|
1030 | 1030 | ) { |
1031 | 1031 | |
1032 | 1032 | $urlAdded = false; |
@@ -1037,14 +1037,14 @@ discard block |
||
1037 | 1037 | ); |
1038 | 1038 | |
1039 | 1039 | // fe user group simulation: |
1040 | - $uGs = implode(',',array_unique(\TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',',$subCfg['userGroups'],1))); |
|
1041 | - if ($uGs) { |
|
1040 | + $uGs = implode(',', array_unique(\TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',', $subCfg['userGroups'], 1))); |
|
1041 | + if ($uGs) { |
|
1042 | 1042 | $parameters['feUserGroupList'] = $uGs; |
1043 | 1043 | } |
1044 | 1044 | |
1045 | 1045 | // Setting processing instructions |
1046 | - $parameters['procInstructions'] = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$subCfg['procInstrFilter']); |
|
1047 | - if (is_array($subCfg['procInstrParams.'])) { |
|
1046 | + $parameters['procInstructions'] = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']); |
|
1047 | + if (is_array($subCfg['procInstrParams.'])) { |
|
1048 | 1048 | $parameters['procInstrParams'] = $subCfg['procInstrParams.']; |
1049 | 1049 | } |
1050 | 1050 | |
@@ -1063,14 +1063,14 @@ discard block |
||
1063 | 1063 | 'configuration' => $subCfg['key'], |
1064 | 1064 | ); |
1065 | 1065 | |
1066 | - if ($this->registerQueueEntriesInternallyOnly) { |
|
1066 | + if ($this->registerQueueEntriesInternallyOnly) { |
|
1067 | 1067 | //the entries will only be registered and not stored to the database |
1068 | 1068 | $this->queueEntries[] = $fieldArray; |
1069 | 1069 | } else { |
1070 | 1070 | |
1071 | - if(!$skipInnerDuplicationCheck){ |
|
1071 | + if (!$skipInnerDuplicationCheck) { |
|
1072 | 1072 | // check if there is already an equal entry |
1073 | - $rows = $this->getDuplicateRowsIfExist($tstamp,$fieldArray); |
|
1073 | + $rows = $this->getDuplicateRowsIfExist($tstamp, $fieldArray); |
|
1074 | 1074 | } |
1075 | 1075 | |
1076 | 1076 | if (count($rows) == 0) { |
@@ -1078,9 +1078,9 @@ discard block |
||
1078 | 1078 | $uid = $this->db->sql_insert_id(); |
1079 | 1079 | $rows[] = $uid; |
1080 | 1080 | $urlAdded = true; |
1081 | - tx_crawler_domain_events_dispatcher::getInstance()->post('urlAddedToQueue',$this->setID,array('uid' => $uid, 'fieldArray' => $fieldArray)); |
|
1082 | - }else{ |
|
1083 | - tx_crawler_domain_events_dispatcher::getInstance()->post('duplicateUrlInQueue',$this->setID,array('rows' => $rows, 'fieldArray' => $fieldArray)); |
|
1081 | + tx_crawler_domain_events_dispatcher::getInstance()->post('urlAddedToQueue', $this->setID, array('uid' => $uid, 'fieldArray' => $fieldArray)); |
|
1082 | + } else { |
|
1083 | + tx_crawler_domain_events_dispatcher::getInstance()->post('duplicateUrlInQueue', $this->setID, array('rows' => $rows, 'fieldArray' => $fieldArray)); |
|
1084 | 1084 | } |
1085 | 1085 | } |
1086 | 1086 | |
@@ -1097,34 +1097,34 @@ discard block |
||
1097 | 1097 | * |
1098 | 1098 | * @return array; |
1099 | 1099 | */ |
1100 | - protected function getDuplicateRowsIfExist($tstamp,$fieldArray){ |
|
1100 | + protected function getDuplicateRowsIfExist($tstamp, $fieldArray) { |
|
1101 | 1101 | $rows = array(); |
1102 | 1102 | |
1103 | 1103 | $currentTime = $this->getCurrentTime(); |
1104 | 1104 | |
1105 | 1105 | //if this entry is scheduled with "now" |
1106 | 1106 | if ($tstamp <= $currentTime) { |
1107 | - if($this->extensionSettings['enableTimeslot']){ |
|
1107 | + if ($this->extensionSettings['enableTimeslot']) { |
|
1108 | 1108 | $timeBegin = $currentTime - 100; |
1109 | - $timeEnd = $currentTime + 100; |
|
1110 | - $where = ' ((scheduled BETWEEN '.$timeBegin.' AND '.$timeEnd.' ) OR scheduled <= '. $currentTime.') '; |
|
1111 | - }else{ |
|
1112 | - $where = 'scheduled <= ' . $currentTime; |
|
1109 | + $timeEnd = $currentTime + 100; |
|
1110 | + $where = ' ((scheduled BETWEEN '.$timeBegin.' AND '.$timeEnd.' ) OR scheduled <= '.$currentTime.') '; |
|
1111 | + } else { |
|
1112 | + $where = 'scheduled <= '.$currentTime; |
|
1113 | 1113 | } |
1114 | 1114 | } elseif ($tstamp > $currentTime) { |
1115 | 1115 | //entry with a timestamp in the future need to have the same schedule time |
1116 | - $where = 'scheduled = ' . $tstamp ; |
|
1116 | + $where = 'scheduled = '.$tstamp; |
|
1117 | 1117 | } |
1118 | 1118 | |
1119 | - if(!empty($where)){ |
|
1119 | + if (!empty($where)) { |
|
1120 | 1120 | $result = $this->db->exec_SELECTgetRows( |
1121 | 1121 | 'qid', |
1122 | 1122 | 'tx_crawler_queue', |
1123 | 1123 | $where. |
1124 | - ' AND NOT exec_time' . |
|
1124 | + ' AND NOT exec_time'. |
|
1125 | 1125 | ' AND NOT process_id '. |
1126 | 1126 | ' AND page_id='.intval($fieldArray['page_id']). |
1127 | - ' AND parameters_hash = ' . $this->db->fullQuoteStr($fieldArray['parameters_hash'], 'tx_crawler_queue') |
|
1127 | + ' AND parameters_hash = '.$this->db->fullQuoteStr($fieldArray['parameters_hash'], 'tx_crawler_queue') |
|
1128 | 1128 | ); |
1129 | 1129 | |
1130 | 1130 | if (is_array($result)) { |
@@ -1145,7 +1145,7 @@ discard block |
||
1145 | 1145 | * |
1146 | 1146 | * @codeCoverageIgnore |
1147 | 1147 | */ |
1148 | - public function getCurrentTime(){ |
|
1148 | + public function getCurrentTime() { |
|
1149 | 1149 | return time(); |
1150 | 1150 | } |
1151 | 1151 | |
@@ -1166,18 +1166,18 @@ discard block |
||
1166 | 1166 | public function readUrl($queueId, $force = FALSE) { |
1167 | 1167 | $ret = 0; |
1168 | 1168 | if ($this->debugMode) { |
1169 | - \TYPO3\CMS\Core\Utility\GeneralUtility::devlog('crawler-readurl start ' . microtime(true), __FUNCTION__); |
|
1169 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devlog('crawler-readurl start '.microtime(true), __FUNCTION__); |
|
1170 | 1170 | } |
1171 | 1171 | // Get entry: |
1172 | 1172 | list($queueRec) = $this->db->exec_SELECTgetRows('*', 'tx_crawler_queue', |
1173 | - 'qid=' . intval($queueId) . ($force ? '' : ' AND exec_time=0 AND process_scheduled > 0')); |
|
1173 | + 'qid='.intval($queueId).($force ? '' : ' AND exec_time=0 AND process_scheduled > 0')); |
|
1174 | 1174 | |
1175 | 1175 | if (!is_array($queueRec)) { |
1176 | 1176 | return; |
1177 | 1177 | } |
1178 | 1178 | |
1179 | - $pageUidRootTypoScript = \AOE\Crawler\Utility\TypoScriptUtility::getPageUidForTypoScriptRootTemplateInRootLine((int)$queueRec['page_id']); |
|
1180 | - $this->initTSFE((int)$pageUidRootTypoScript); |
|
1179 | + $pageUidRootTypoScript = \AOE\Crawler\Utility\TypoScriptUtility::getPageUidForTypoScriptRootTemplateInRootLine((int) $queueRec['page_id']); |
|
1180 | + $this->initTSFE((int) $pageUidRootTypoScript); |
|
1181 | 1181 | |
1182 | 1182 | \AOE\Crawler\Utility\SignalSlotUtility::emitSignal( |
1183 | 1183 | __CLASS__, |
@@ -1192,7 +1192,7 @@ discard block |
||
1192 | 1192 | //if mulitprocessing is used we need to store the id of the process which has handled this entry |
1193 | 1193 | $field_array['process_id_completed'] = $this->processID; |
1194 | 1194 | } |
1195 | - $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid=' . intval($queueId), $field_array); |
|
1195 | + $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid='.intval($queueId), $field_array); |
|
1196 | 1196 | |
1197 | 1197 | $result = $this->readUrl_exec($queueRec); |
1198 | 1198 | $resultData = unserialize($result['content']); |
@@ -1221,11 +1221,11 @@ discard block |
||
1221 | 1221 | array($queueId, &$field_array) |
1222 | 1222 | ); |
1223 | 1223 | |
1224 | - $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid=' . intval($queueId), $field_array); |
|
1224 | + $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid='.intval($queueId), $field_array); |
|
1225 | 1225 | |
1226 | 1226 | |
1227 | 1227 | if ($this->debugMode) { |
1228 | - \TYPO3\CMS\Core\Utility\GeneralUtility::devlog('crawler-readurl stop ' . microtime(true), __FUNCTION__); |
|
1228 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devlog('crawler-readurl stop '.microtime(true), __FUNCTION__); |
|
1229 | 1229 | } |
1230 | 1230 | |
1231 | 1231 | return $ret; |
@@ -1238,7 +1238,7 @@ discard block |
||
1238 | 1238 | * |
1239 | 1239 | * @return string |
1240 | 1240 | */ |
1241 | - protected function readUrlFromArray($field_array) { |
|
1241 | + protected function readUrlFromArray($field_array) { |
|
1242 | 1242 | |
1243 | 1243 | // Set exec_time to lock record: |
1244 | 1244 | $field_array['exec_time'] = $this->getCurrentTime(); |
@@ -1256,7 +1256,7 @@ discard block |
||
1256 | 1256 | array($queueId, &$field_array) |
1257 | 1257 | ); |
1258 | 1258 | |
1259 | - $this->db->exec_UPDATEquery('tx_crawler_queue','qid='.intval($queueId), $field_array); |
|
1259 | + $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid='.intval($queueId), $field_array); |
|
1260 | 1260 | |
1261 | 1261 | return $result; |
1262 | 1262 | } |
@@ -1267,17 +1267,17 @@ discard block |
||
1267 | 1267 | * @param array $queueRec Queue record |
1268 | 1268 | * @return string Result output. |
1269 | 1269 | */ |
1270 | - protected function readUrl_exec($queueRec) { |
|
1270 | + protected function readUrl_exec($queueRec) { |
|
1271 | 1271 | // Decode parameters: |
1272 | 1272 | $parameters = unserialize($queueRec['parameters']); |
1273 | 1273 | $result = 'ERROR'; |
1274 | - if (is_array($parameters)) { |
|
1275 | - if ($parameters['_CALLBACKOBJ']) { // Calling object: |
|
1274 | + if (is_array($parameters)) { |
|
1275 | + if ($parameters['_CALLBACKOBJ']) { // Calling object: |
|
1276 | 1276 | $objRef = $parameters['_CALLBACKOBJ']; |
1277 | 1277 | $callBackObj = &\TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($objRef); |
1278 | - if (is_object($callBackObj)) { |
|
1278 | + if (is_object($callBackObj)) { |
|
1279 | 1279 | unset($parameters['_CALLBACKOBJ']); |
1280 | - $result = array('content' => serialize($callBackObj->crawler_execute($parameters,$this))); |
|
1280 | + $result = array('content' => serialize($callBackObj->crawler_execute($parameters, $this))); |
|
1281 | 1281 | } else { |
1282 | 1282 | $result = array('content' => 'No object: '.$objRef); |
1283 | 1283 | } |
@@ -1287,9 +1287,9 @@ discard block |
||
1287 | 1287 | $crawlerId = $queueRec['qid'].':'.md5($queueRec['qid'].'|'.$queueRec['set_id'].'|'.$GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey']); |
1288 | 1288 | |
1289 | 1289 | // Get result: |
1290 | - $result = $this->requestUrl($parameters['url'],$crawlerId); |
|
1290 | + $result = $this->requestUrl($parameters['url'], $crawlerId); |
|
1291 | 1291 | |
1292 | - tx_crawler_domain_events_dispatcher::getInstance()->post('urlCrawled',$queueRec['set_id'],array('url' => $parameters['url'], 'result' => $result)); |
|
1292 | + tx_crawler_domain_events_dispatcher::getInstance()->post('urlCrawled', $queueRec['set_id'], array('url' => $parameters['url'], 'result' => $result)); |
|
1293 | 1293 | } |
1294 | 1294 | } |
1295 | 1295 | |
@@ -1306,7 +1306,7 @@ discard block |
||
1306 | 1306 | * @param integer $recursion Recursion limiter for 302 redirects |
1307 | 1307 | * @return array Array with content |
1308 | 1308 | */ |
1309 | - public function requestUrl($originalUrl, $crawlerId, $timeout=2, $recursion=10) { |
|
1309 | + public function requestUrl($originalUrl, $crawlerId, $timeout = 2, $recursion = 10) { |
|
1310 | 1310 | |
1311 | 1311 | if (!$recursion) return false; |
1312 | 1312 | |
@@ -1318,7 +1318,7 @@ discard block |
||
1318 | 1318 | return FALSE; |
1319 | 1319 | } |
1320 | 1320 | |
1321 | - if (!in_array($url['scheme'], array('','http','https'))) { |
|
1321 | + if (!in_array($url['scheme'], array('', 'http', 'https'))) { |
|
1322 | 1322 | if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Scheme does not match for url "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
1323 | 1323 | return FALSE; |
1324 | 1324 | } |
@@ -1336,14 +1336,14 @@ discard block |
||
1336 | 1336 | |
1337 | 1337 | if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['curlUse'] && $GLOBALS['TYPO3_CONF_VARS']['SYS']['curlProxyServer']) { |
1338 | 1338 | $rurl = parse_url($GLOBALS['TYPO3_CONF_VARS']['SYS']['curlProxyServer']); |
1339 | - $url['path'] = $url['scheme'] . '://' . $url['host'] . ($url['port'] > 0 ? ':' . $url['port'] : '') . $url['path']; |
|
1339 | + $url['path'] = $url['scheme'].'://'.$url['host'].($url['port'] > 0 ? ':'.$url['port'] : '').$url['path']; |
|
1340 | 1340 | $reqHeaders = $this->buildRequestHeaderArray($url, $crawlerId); |
1341 | 1341 | } |
1342 | 1342 | |
1343 | 1343 | $host = $rurl['host']; |
1344 | 1344 | |
1345 | 1345 | if ($url['scheme'] == 'https') { |
1346 | - $host = 'ssl://' . $host; |
|
1346 | + $host = 'ssl://'.$host; |
|
1347 | 1347 | $port = ($rurl['port'] > 0) ? $rurl['port'] : 443; |
1348 | 1348 | } else { |
1349 | 1349 | $port = ($rurl['port'] > 0) ? $rurl['port'] : 80; |
@@ -1357,24 +1357,24 @@ discard block |
||
1357 | 1357 | return FALSE; |
1358 | 1358 | } else { |
1359 | 1359 | // Request message: |
1360 | - $msg = implode("\r\n",$reqHeaders)."\r\n\r\n"; |
|
1361 | - fputs ($fp, $msg); |
|
1360 | + $msg = implode("\r\n", $reqHeaders)."\r\n\r\n"; |
|
1361 | + fputs($fp, $msg); |
|
1362 | 1362 | |
1363 | 1363 | // Read response: |
1364 | 1364 | $d = $this->getHttpResponseFromStream($fp); |
1365 | - fclose ($fp); |
|
1365 | + fclose($fp); |
|
1366 | 1366 | |
1367 | 1367 | $time = microtime(true) - $startTime; |
1368 | - $this->log($originalUrl .' '.$time); |
|
1368 | + $this->log($originalUrl.' '.$time); |
|
1369 | 1369 | |
1370 | 1370 | // Implode content and headers: |
1371 | 1371 | $result = array( |
1372 | 1372 | 'request' => $msg, |
1373 | 1373 | 'headers' => implode('', $d['headers']), |
1374 | - 'content' => implode('', (array)$d['content']) |
|
1374 | + 'content' => implode('', (array) $d['content']) |
|
1375 | 1375 | ); |
1376 | 1376 | |
1377 | - if (($this->extensionSettings['follow30x']) && ($newUrl = $this->getRequestUrlFrom302Header($d['headers'],$url['user'],$url['pass']))) { |
|
1377 | + if (($this->extensionSettings['follow30x']) && ($newUrl = $this->getRequestUrlFrom302Header($d['headers'], $url['user'], $url['pass']))) { |
|
1378 | 1378 | $result = array_merge(array('parentRequest'=>$result), $this->requestUrl($newUrl, $crawlerId, $recursion--)); |
1379 | 1379 | $newRequestUrl = $this->requestUrl($newUrl, $crawlerId, $timeout, --$recursion); |
1380 | 1380 | |
@@ -1413,8 +1413,8 @@ discard block |
||
1413 | 1413 | |
1414 | 1414 | // Base path must be '/<pathSegements>/': |
1415 | 1415 | if ($frontendBasePath != '/') { |
1416 | - $frontendBasePath = '/' . ltrim($frontendBasePath, '/'); |
|
1417 | - $frontendBasePath = rtrim($frontendBasePath, '/') . '/'; |
|
1416 | + $frontendBasePath = '/'.ltrim($frontendBasePath, '/'); |
|
1417 | + $frontendBasePath = rtrim($frontendBasePath, '/').'/'; |
|
1418 | 1418 | } |
1419 | 1419 | |
1420 | 1420 | return $frontendBasePath; |
@@ -1447,7 +1447,7 @@ discard block |
||
1447 | 1447 | |
1448 | 1448 | if (is_resource($streamPointer)) { |
1449 | 1449 | // read headers |
1450 | - while($line = fgets($streamPointer, '2048')) { |
|
1450 | + while ($line = fgets($streamPointer, '2048')) { |
|
1451 | 1451 | $line = trim($line); |
1452 | 1452 | if ($line !== '') { |
1453 | 1453 | $response['headers'][] = $line; |
@@ -1457,7 +1457,7 @@ discard block |
||
1457 | 1457 | } |
1458 | 1458 | |
1459 | 1459 | // read content |
1460 | - while($line = fgets($streamPointer, '2048')) { |
|
1460 | + while ($line = fgets($streamPointer, '2048')) { |
|
1461 | 1461 | $response['content'][] = $line; |
1462 | 1462 | } |
1463 | 1463 | } |
@@ -1470,7 +1470,7 @@ discard block |
||
1470 | 1470 | */ |
1471 | 1471 | protected function log($message) { |
1472 | 1472 | if (!empty($this->extensionSettings['logFileName'])) { |
1473 | - @file_put_contents($this->extensionSettings['logFileName'], date('Ymd His') . $message . "\n", FILE_APPEND); |
|
1473 | + @file_put_contents($this->extensionSettings['logFileName'], date('Ymd His').$message."\n", FILE_APPEND); |
|
1474 | 1474 | } |
1475 | 1475 | } |
1476 | 1476 | |
@@ -1486,12 +1486,12 @@ discard block |
||
1486 | 1486 | $reqHeaders = array(); |
1487 | 1487 | $reqHeaders[] = 'GET '.$url['path'].($url['query'] ? '?'.$url['query'] : '').' HTTP/1.0'; |
1488 | 1488 | $reqHeaders[] = 'Host: '.$url['host']; |
1489 | - if (stristr($url['query'],'ADMCMD_previewWS')) { |
|
1489 | + if (stristr($url['query'], 'ADMCMD_previewWS')) { |
|
1490 | 1490 | $reqHeaders[] = 'Cookie: $Version="1"; be_typo_user="1"; $Path=/'; |
1491 | 1491 | } |
1492 | 1492 | $reqHeaders[] = 'Connection: close'; |
1493 | - if ($url['user']!='') { |
|
1494 | - $reqHeaders[] = 'Authorization: Basic '. base64_encode($url['user'].':'.$url['pass']); |
|
1493 | + if ($url['user'] != '') { |
|
1494 | + $reqHeaders[] = 'Authorization: Basic '.base64_encode($url['user'].':'.$url['pass']); |
|
1495 | 1495 | } |
1496 | 1496 | $reqHeaders[] = 'X-T3crawler: '.$crawlerId; |
1497 | 1497 | $reqHeaders[] = 'User-Agent: TYPO3 crawler'; |
@@ -1506,21 +1506,21 @@ discard block |
||
1506 | 1506 | * @param string HTTP Auth. Password |
1507 | 1507 | * @return string URL from redirection |
1508 | 1508 | */ |
1509 | - protected function getRequestUrlFrom302Header($headers,$user='',$pass='') { |
|
1510 | - if(!is_array($headers)) return false; |
|
1511 | - if(!(stristr($headers[0],'301 Moved') || stristr($headers[0],'302 Found') || stristr($headers[0],'302 Moved'))) return false; |
|
1509 | + protected function getRequestUrlFrom302Header($headers, $user = '', $pass = '') { |
|
1510 | + if (!is_array($headers)) return false; |
|
1511 | + if (!(stristr($headers[0], '301 Moved') || stristr($headers[0], '302 Found') || stristr($headers[0], '302 Moved'))) return false; |
|
1512 | 1512 | |
1513 | - foreach($headers as $hl) { |
|
1514 | - $tmp = explode(": ",$hl); |
|
1513 | + foreach ($headers as $hl) { |
|
1514 | + $tmp = explode(": ", $hl); |
|
1515 | 1515 | $header[trim($tmp[0])] = trim($tmp[1]); |
1516 | - if(trim($tmp[0])=='Location') break; |
|
1516 | + if (trim($tmp[0]) == 'Location') break; |
|
1517 | 1517 | } |
1518 | - if(!array_key_exists('Location',$header)) return false; |
|
1518 | + if (!array_key_exists('Location', $header)) return false; |
|
1519 | 1519 | |
1520 | - if($user!='') { |
|
1521 | - if(!($tmp = parse_url($header['Location']))) return false; |
|
1522 | - $newUrl = $tmp['scheme'] . '://' . $user . ':' . $pass . '@' . $tmp['host'] . $tmp['path']; |
|
1523 | - if($tmp['query']!='') $newUrl .= '?' . $tmp['query']; |
|
1520 | + if ($user != '') { |
|
1521 | + if (!($tmp = parse_url($header['Location']))) return false; |
|
1522 | + $newUrl = $tmp['scheme'].'://'.$user.':'.$pass.'@'.$tmp['host'].$tmp['path']; |
|
1523 | + if ($tmp['query'] != '') $newUrl .= '?'.$tmp['query']; |
|
1524 | 1524 | } else { |
1525 | 1525 | $newUrl = $header['Location']; |
1526 | 1526 | } |
@@ -1541,15 +1541,15 @@ discard block |
||
1541 | 1541 | * @param object TSFE object (reference under PHP5) |
1542 | 1542 | * @return void |
1543 | 1543 | */ |
1544 | - function fe_init(&$params, $ref) { |
|
1544 | + function fe_init(&$params, $ref) { |
|
1545 | 1545 | |
1546 | 1546 | // Authenticate crawler request: |
1547 | - if (isset($_SERVER['HTTP_X_T3CRAWLER'])) { |
|
1548 | - list($queueId,$hash) = explode(':', $_SERVER['HTTP_X_T3CRAWLER']); |
|
1549 | - list($queueRec) = $this->db->exec_SELECTgetRows('*','tx_crawler_queue','qid='.intval($queueId)); |
|
1547 | + if (isset($_SERVER['HTTP_X_T3CRAWLER'])) { |
|
1548 | + list($queueId, $hash) = explode(':', $_SERVER['HTTP_X_T3CRAWLER']); |
|
1549 | + list($queueRec) = $this->db->exec_SELECTgetRows('*', 'tx_crawler_queue', 'qid='.intval($queueId)); |
|
1550 | 1550 | |
1551 | 1551 | // If a crawler record was found and hash was matching, set it up: |
1552 | - if (is_array($queueRec) && $hash === md5($queueRec['qid'].'|'.$queueRec['set_id'].'|'.$GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey'])) { |
|
1552 | + if (is_array($queueRec) && $hash === md5($queueRec['qid'].'|'.$queueRec['set_id'].'|'.$GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey'])) { |
|
1553 | 1553 | $params['pObj']->applicationData['tx_crawler']['running'] = TRUE; |
1554 | 1554 | $params['pObj']->applicationData['tx_crawler']['parameters'] = unserialize($queueRec['parameters']); |
1555 | 1555 | $params['pObj']->applicationData['tx_crawler']['log'] = array(); |
@@ -1607,7 +1607,7 @@ discard block |
||
1607 | 1607 | /* @var $tree \TYPO3\CMS\Backend\Tree\View\PageTreeView */ |
1608 | 1608 | $tree = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\CMS\Backend\Tree\View\PageTreeView'); |
1609 | 1609 | $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1); |
1610 | - $tree->init('AND ' . $perms_clause); |
|
1610 | + $tree->init('AND '.$perms_clause); |
|
1611 | 1611 | |
1612 | 1612 | $pageinfo = \TYPO3\CMS\Backend\Utility\BackendUtility::readPageAccess($id, $perms_clause); |
1613 | 1613 | |
@@ -1618,7 +1618,7 @@ discard block |
||
1618 | 1618 | ); |
1619 | 1619 | |
1620 | 1620 | // Get branch beneath: |
1621 | - if ($depth) { |
|
1621 | + if ($depth) { |
|
1622 | 1622 | $tree->getTree($id, $depth, ''); |
1623 | 1623 | } |
1624 | 1624 | |
@@ -1630,7 +1630,7 @@ discard block |
||
1630 | 1630 | $this->MP = false; |
1631 | 1631 | |
1632 | 1632 | // recognize mount points |
1633 | - if($data['row']['doktype'] == 7){ |
|
1633 | + if ($data['row']['doktype'] == 7) { |
|
1634 | 1634 | $mountpage = $this->db->exec_SELECTgetRows('*', 'pages', 'uid = '.$data['row']['uid']); |
1635 | 1635 | |
1636 | 1636 | // fetch mounted pages |
@@ -1640,15 +1640,15 @@ discard block |
||
1640 | 1640 | $mountTree->init('AND '.$perms_clause); |
1641 | 1641 | $mountTree->getTree($mountpage[0]['mount_pid'], $depth, ''); |
1642 | 1642 | |
1643 | - foreach($mountTree->tree as $mountData) { |
|
1643 | + foreach ($mountTree->tree as $mountData) { |
|
1644 | 1644 | $code .= $this->drawURLs_addRowsForPage( |
1645 | 1645 | $mountData['row'], |
1646 | - $mountData['HTML'].\TYPO3\CMS\Backend\Utility\BackendUtility::getRecordTitle('pages',$mountData['row'],TRUE) |
|
1646 | + $mountData['HTML'].\TYPO3\CMS\Backend\Utility\BackendUtility::getRecordTitle('pages', $mountData['row'], TRUE) |
|
1647 | 1647 | ); |
1648 | 1648 | } |
1649 | 1649 | |
1650 | 1650 | // replace page when mount_pid_ol is enabled |
1651 | - if($mountpage[0]['mount_pid_ol']){ |
|
1651 | + if ($mountpage[0]['mount_pid_ol']) { |
|
1652 | 1652 | $data['row']['uid'] = $mountpage[0]['mount_pid']; |
1653 | 1653 | } else { |
1654 | 1654 | // if the mount_pid_ol is not set the MP must not be used for the mountpoint page |
@@ -1658,7 +1658,7 @@ discard block |
||
1658 | 1658 | |
1659 | 1659 | $code .= $this->drawURLs_addRowsForPage( |
1660 | 1660 | $data['row'], |
1661 | - $data['HTML'] . \TYPO3\CMS\Backend\Utility\BackendUtility::getRecordTitle('pages', $data['row'], TRUE) |
|
1661 | + $data['HTML'].\TYPO3\CMS\Backend\Utility\BackendUtility::getRecordTitle('pages', $data['row'], TRUE) |
|
1662 | 1662 | ); |
1663 | 1663 | } |
1664 | 1664 | |
@@ -1682,7 +1682,7 @@ discard block |
||
1682 | 1682 | if (!empty($excludeString)) { |
1683 | 1683 | /* @var $tree \TYPO3\CMS\Backend\Tree\View\PageTreeView */ |
1684 | 1684 | $tree = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\CMS\Backend\Tree\View\PageTreeView'); |
1685 | - $tree->init('AND ' . $this->backendUser->getPagePermsClause(1)); |
|
1685 | + $tree->init('AND '.$this->backendUser->getPagePermsClause(1)); |
|
1686 | 1686 | |
1687 | 1687 | $excludeParts = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $excludeString); |
1688 | 1688 | |
@@ -1691,7 +1691,7 @@ discard block |
||
1691 | 1691 | |
1692 | 1692 | // default is "page only" = "depth=0" |
1693 | 1693 | if (empty($depth)) { |
1694 | - $depth = ( stristr($excludePart,'+')) ? 99 : 0; |
|
1694 | + $depth = (stristr($excludePart, '+')) ? 99 : 0; |
|
1695 | 1695 | } |
1696 | 1696 | |
1697 | 1697 | $pidList[] = $pid; |
@@ -1724,7 +1724,7 @@ discard block |
||
1724 | 1724 | * @param string Page icon and title for row |
1725 | 1725 | * @return string HTML <tr> content (one or more) |
1726 | 1726 | */ |
1727 | - public function drawURLs_addRowsForPage(array $pageRow, $pageTitleAndIcon) { |
|
1727 | + public function drawURLs_addRowsForPage(array $pageRow, $pageTitleAndIcon) { |
|
1728 | 1728 | |
1729 | 1729 | $skipMessage = ''; |
1730 | 1730 | |
@@ -1745,7 +1745,7 @@ discard block |
||
1745 | 1745 | $cc = 0; |
1746 | 1746 | $content = ''; |
1747 | 1747 | if (count($configurations)) { |
1748 | - foreach($configurations as $confKey => $confArray) { |
|
1748 | + foreach ($configurations as $confKey => $confArray) { |
|
1749 | 1749 | |
1750 | 1750 | // Title column: |
1751 | 1751 | if (!$c) { |
@@ -1774,47 +1774,47 @@ discard block |
||
1774 | 1774 | $paramExpanded = ''; |
1775 | 1775 | $calcAccu = array(); |
1776 | 1776 | $calcRes = 1; |
1777 | - foreach($confArray['paramExpanded'] as $gVar => $gVal) { |
|
1778 | - $paramExpanded.= ' |
|
1777 | + foreach ($confArray['paramExpanded'] as $gVar => $gVal) { |
|
1778 | + $paramExpanded .= ' |
|
1779 | 1779 | <tr> |
1780 | 1780 | <td class="bgColor4-20">'.htmlspecialchars('&'.$gVar.'=').'<br/>'. |
1781 | 1781 | '('.count($gVal).')'. |
1782 | 1782 | '</td> |
1783 | - <td class="bgColor4" nowrap="nowrap">'.nl2br(htmlspecialchars(implode(chr(10),$gVal))).'</td> |
|
1783 | + <td class="bgColor4" nowrap="nowrap">'.nl2br(htmlspecialchars(implode(chr(10), $gVal))).'</td> |
|
1784 | 1784 | </tr> |
1785 | 1785 | '; |
1786 | - $calcRes*= count($gVal); |
|
1786 | + $calcRes *= count($gVal); |
|
1787 | 1787 | $calcAccu[] = count($gVal); |
1788 | 1788 | } |
1789 | 1789 | $paramExpanded = '<table class="lrPadding c-list param-expanded">'.$paramExpanded.'</table>'; |
1790 | - $paramExpanded.= 'Comb: '.implode('*',$calcAccu).'='.$calcRes; |
|
1790 | + $paramExpanded .= 'Comb: '.implode('*', $calcAccu).'='.$calcRes; |
|
1791 | 1791 | |
1792 | 1792 | // Options |
1793 | 1793 | $optionValues = ''; |
1794 | - if ($confArray['subCfg']['userGroups']) { |
|
1795 | - $optionValues.='User Groups: '.$confArray['subCfg']['userGroups'].'<br/>'; |
|
1794 | + if ($confArray['subCfg']['userGroups']) { |
|
1795 | + $optionValues .= 'User Groups: '.$confArray['subCfg']['userGroups'].'<br/>'; |
|
1796 | 1796 | } |
1797 | - if ($confArray['subCfg']['baseUrl']) { |
|
1798 | - $optionValues.='Base Url: '.$confArray['subCfg']['baseUrl'].'<br/>'; |
|
1797 | + if ($confArray['subCfg']['baseUrl']) { |
|
1798 | + $optionValues .= 'Base Url: '.$confArray['subCfg']['baseUrl'].'<br/>'; |
|
1799 | 1799 | } |
1800 | - if ($confArray['subCfg']['procInstrFilter']) { |
|
1801 | - $optionValues.='ProcInstr: '.$confArray['subCfg']['procInstrFilter'].'<br/>'; |
|
1800 | + if ($confArray['subCfg']['procInstrFilter']) { |
|
1801 | + $optionValues .= 'ProcInstr: '.$confArray['subCfg']['procInstrFilter'].'<br/>'; |
|
1802 | 1802 | } |
1803 | 1803 | |
1804 | 1804 | // Compile row: |
1805 | 1805 | $content .= ' |
1806 | - <tr class="bgColor' . ($c%2 ? '-20':'-10') . '"> |
|
1807 | - ' . $titleClm . ' |
|
1808 | - <td>' . htmlspecialchars($confKey) . '</td> |
|
1809 | - <td>' . nl2br(htmlspecialchars(rawurldecode(trim(str_replace('&', chr(10) . '&', \TYPO3\CMS\Core\Utility\GeneralUtility::implodeArrayForUrl('', $confArray['paramParsed'])))))) . '</td> |
|
1806 | + <tr class="bgColor' . ($c % 2 ? '-20' : '-10').'"> |
|
1807 | + ' . $titleClm.' |
|
1808 | + <td>' . htmlspecialchars($confKey).'</td> |
|
1809 | + <td>' . nl2br(htmlspecialchars(rawurldecode(trim(str_replace('&', chr(10).'&', \TYPO3\CMS\Core\Utility\GeneralUtility::implodeArrayForUrl('', $confArray['paramParsed'])))))).'</td> |
|
1810 | 1810 | <td>'.$paramExpanded.'</td> |
1811 | - <td nowrap="nowrap">' . $urlList . '</td> |
|
1812 | - <td nowrap="nowrap">' . $optionValues . '</td> |
|
1813 | - <td nowrap="nowrap">' . \TYPO3\CMS\Core\Utility\DebugUtility::viewArray($confArray['subCfg']['procInstrParams.']) . '</td> |
|
1811 | + <td nowrap="nowrap">' . $urlList.'</td> |
|
1812 | + <td nowrap="nowrap">' . $optionValues.'</td> |
|
1813 | + <td nowrap="nowrap">' . \TYPO3\CMS\Core\Utility\DebugUtility::viewArray($confArray['subCfg']['procInstrParams.']).'</td> |
|
1814 | 1814 | </tr>'; |
1815 | 1815 | } else { |
1816 | 1816 | |
1817 | - $content .= '<tr class="bgColor'.($c%2 ? '-20':'-10') . '"> |
|
1817 | + $content .= '<tr class="bgColor'.($c % 2 ? '-20' : '-10').'"> |
|
1818 | 1818 | '.$titleClm.' |
1819 | 1819 | <td>'.htmlspecialchars($confKey).'</td> |
1820 | 1820 | <td colspan="5"><em>No entries</em> (Page is excluded in this configuration)</td> |
@@ -1829,7 +1829,7 @@ discard block |
||
1829 | 1829 | $message = !empty($skipMessage) ? ' ('.$skipMessage.')' : ''; |
1830 | 1830 | |
1831 | 1831 | // Compile row: |
1832 | - $content.= ' |
|
1832 | + $content .= ' |
|
1833 | 1833 | <tr class="bgColor-20" style="border-bottom: 1px solid black;"> |
1834 | 1834 | <td>'.$pageTitleAndIcon.'</td> |
1835 | 1835 | <td colspan="6"><em>No entries</em>'.$message.'</td> |
@@ -1890,7 +1890,7 @@ discard block |
||
1890 | 1890 | // Run process: |
1891 | 1891 | $result = $this->CLI_run($countInARun, $sleepTime, $sleepAfterFinish); |
1892 | 1892 | } catch (Exception $e) { |
1893 | - $this->CLI_debug(get_class($e) . ': ' . $e->getMessage()); |
|
1893 | + $this->CLI_debug(get_class($e).': '.$e->getMessage()); |
|
1894 | 1894 | $result = self::CLI_STATUS_ABORTED; |
1895 | 1895 | } |
1896 | 1896 | |
@@ -1901,7 +1901,7 @@ discard block |
||
1901 | 1901 | $releaseStatus = $this->CLI_releaseProcesses($this->CLI_buildProcessId()); |
1902 | 1902 | |
1903 | 1903 | $this->CLI_debug("Unprocessed Items remaining:".$this->getUnprocessedItemsCount()." (".$this->CLI_buildProcessId().")"); |
1904 | - $result |= ( $this->getUnprocessedItemsCount() > 0 ? self::CLI_STATUS_REMAIN : self::CLI_STATUS_NOTHING_PROCCESSED ); |
|
1904 | + $result |= ($this->getUnprocessedItemsCount() > 0 ? self::CLI_STATUS_REMAIN : self::CLI_STATUS_NOTHING_PROCCESSED); |
|
1905 | 1905 | } else { |
1906 | 1906 | $result |= self::CLI_STATUS_ABORTED; |
1907 | 1907 | } |
@@ -1914,7 +1914,7 @@ discard block |
||
1914 | 1914 | * |
1915 | 1915 | * @return void |
1916 | 1916 | */ |
1917 | - function CLI_main_im() { |
|
1917 | + function CLI_main_im() { |
|
1918 | 1918 | $this->setAccessMode('cli_im'); |
1919 | 1919 | |
1920 | 1920 | $cliObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_cli_im'); |
@@ -1924,7 +1924,7 @@ discard block |
||
1924 | 1924 | $this->backendUser->setWorkspace(0); |
1925 | 1925 | |
1926 | 1926 | // Print help |
1927 | - if (!isset($cliObj->cli_args['_DEFAULT'][1])) { |
|
1927 | + if (!isset($cliObj->cli_args['_DEFAULT'][1])) { |
|
1928 | 1928 | $cliObj->cli_validateArgs(); |
1929 | 1929 | $cliObj->cli_help(); |
1930 | 1930 | exit; |
@@ -1932,8 +1932,8 @@ discard block |
||
1932 | 1932 | |
1933 | 1933 | $cliObj->cli_validateArgs(); |
1934 | 1934 | |
1935 | - if ($cliObj->cli_argValue('-o')==='exec') { |
|
1936 | - $this->registerQueueEntriesInternallyOnly=TRUE; |
|
1935 | + if ($cliObj->cli_argValue('-o') === 'exec') { |
|
1936 | + $this->registerQueueEntriesInternallyOnly = TRUE; |
|
1937 | 1937 | } |
1938 | 1938 | |
1939 | 1939 | if (isset($cliObj->cli_args['_DEFAULT'][2])) { |
@@ -1946,16 +1946,16 @@ discard block |
||
1946 | 1946 | |
1947 | 1947 | $configurationKeys = $this->getConfigurationKeys($cliObj); |
1948 | 1948 | |
1949 | - if(!is_array($configurationKeys)){ |
|
1949 | + if (!is_array($configurationKeys)) { |
|
1950 | 1950 | $configurations = $this->getUrlsForPageId($pageId); |
1951 | - if(is_array($configurations)){ |
|
1951 | + if (is_array($configurations)) { |
|
1952 | 1952 | $configurationKeys = array_keys($configurations); |
1953 | - }else{ |
|
1953 | + } else { |
|
1954 | 1954 | $configurationKeys = array(); |
1955 | 1955 | } |
1956 | 1956 | } |
1957 | 1957 | |
1958 | - if($cliObj->cli_argValue('-o')==='queue' || $cliObj->cli_argValue('-o')==='exec'){ |
|
1958 | + if ($cliObj->cli_argValue('-o') === 'queue' || $cliObj->cli_argValue('-o') === 'exec') { |
|
1959 | 1959 | |
1960 | 1960 | $reason = new tx_crawler_domain_reason(); |
1961 | 1961 | $reason->setReason(tx_crawler_domain_reason::REASON_GUI_SUBMIT); |
@@ -1963,7 +1963,7 @@ discard block |
||
1963 | 1963 | tx_crawler_domain_events_dispatcher::getInstance()->post( |
1964 | 1964 | 'invokeQueueChange', |
1965 | 1965 | $this->setID, |
1966 | - array( 'reason' => $reason ) |
|
1966 | + array('reason' => $reason) |
|
1967 | 1967 | ); |
1968 | 1968 | } |
1969 | 1969 | |
@@ -1974,42 +1974,42 @@ discard block |
||
1974 | 1974 | $this->setID = \TYPO3\CMS\Core\Utility\GeneralUtility::md5int(microtime()); |
1975 | 1975 | $this->getPageTreeAndUrls( |
1976 | 1976 | $pageId, |
1977 | - \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_argValue('-d'),0,99), |
|
1977 | + \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_argValue('-d'), 0, 99), |
|
1978 | 1978 | $this->getCurrentTime(), |
1979 | - \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_isArg('-n') ? $cliObj->cli_argValue('-n') : 30,1,1000), |
|
1980 | - $cliObj->cli_argValue('-o')==='queue' || $cliObj->cli_argValue('-o')==='exec', |
|
1981 | - $cliObj->cli_argValue('-o')==='url', |
|
1982 | - \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$cliObj->cli_argValue('-proc'),1), |
|
1979 | + \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_isArg('-n') ? $cliObj->cli_argValue('-n') : 30, 1, 1000), |
|
1980 | + $cliObj->cli_argValue('-o') === 'queue' || $cliObj->cli_argValue('-o') === 'exec', |
|
1981 | + $cliObj->cli_argValue('-o') === 'url', |
|
1982 | + \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $cliObj->cli_argValue('-proc'), 1), |
|
1983 | 1983 | $configurationKeys |
1984 | 1984 | ); |
1985 | 1985 | |
1986 | - if ($cliObj->cli_argValue('-o')==='url') { |
|
1987 | - $cliObj->cli_echo(implode(chr(10),$this->downloadUrls).chr(10),1); |
|
1988 | - } elseif ($cliObj->cli_argValue('-o')==='exec') { |
|
1986 | + if ($cliObj->cli_argValue('-o') === 'url') { |
|
1987 | + $cliObj->cli_echo(implode(chr(10), $this->downloadUrls).chr(10), 1); |
|
1988 | + } elseif ($cliObj->cli_argValue('-o') === 'exec') { |
|
1989 | 1989 | $cliObj->cli_echo("Executing ".count($this->urlList)." requests right away:\n\n"); |
1990 | - $cliObj->cli_echo(implode(chr(10),$this->urlList).chr(10)); |
|
1990 | + $cliObj->cli_echo(implode(chr(10), $this->urlList).chr(10)); |
|
1991 | 1991 | $cliObj->cli_echo("\nProcessing:\n"); |
1992 | 1992 | |
1993 | - foreach($this->queueEntries as $queueRec) { |
|
1993 | + foreach ($this->queueEntries as $queueRec) { |
|
1994 | 1994 | $p = unserialize($queueRec['parameters']); |
1995 | - $cliObj->cli_echo($p['url'].' ('.implode(',',$p['procInstructions']).') => '); |
|
1995 | + $cliObj->cli_echo($p['url'].' ('.implode(',', $p['procInstructions']).') => '); |
|
1996 | 1996 | |
1997 | 1997 | $result = $this->readUrlFromArray($queueRec); |
1998 | 1998 | |
1999 | 1999 | $requestResult = unserialize($result['content']); |
2000 | - if (is_array($requestResult)) { |
|
2001 | - $resLog = is_array($requestResult['log']) ? chr(10).chr(9).chr(9).implode(chr(10).chr(9).chr(9),$requestResult['log']) : ''; |
|
2000 | + if (is_array($requestResult)) { |
|
2001 | + $resLog = is_array($requestResult['log']) ? chr(10).chr(9).chr(9).implode(chr(10).chr(9).chr(9), $requestResult['log']) : ''; |
|
2002 | 2002 | $cliObj->cli_echo('OK: '.$resLog.chr(10)); |
2003 | 2003 | } else { |
2004 | - $cliObj->cli_echo('Error checking Crawler Result: '.substr(preg_replace('/\s+/',' ',strip_tags($result['content'])),0,30000).'...'.chr(10)); |
|
2004 | + $cliObj->cli_echo('Error checking Crawler Result: '.substr(preg_replace('/\s+/', ' ', strip_tags($result['content'])), 0, 30000).'...'.chr(10)); |
|
2005 | 2005 | } |
2006 | 2006 | } |
2007 | - } elseif ($cliObj->cli_argValue('-o')==='queue') { |
|
2007 | + } elseif ($cliObj->cli_argValue('-o') === 'queue') { |
|
2008 | 2008 | $cliObj->cli_echo("Putting ".count($this->urlList)." entries in queue:\n\n"); |
2009 | - $cliObj->cli_echo(implode(chr(10),$this->urlList).chr(10)); |
|
2009 | + $cliObj->cli_echo(implode(chr(10), $this->urlList).chr(10)); |
|
2010 | 2010 | } else { |
2011 | - $cliObj->cli_echo(count($this->urlList)." entries found for processing. (Use -o to decide action):\n\n",1); |
|
2012 | - $cliObj->cli_echo(implode(chr(10),$this->urlList).chr(10),1); |
|
2011 | + $cliObj->cli_echo(count($this->urlList)." entries found for processing. (Use -o to decide action):\n\n", 1); |
|
2012 | + $cliObj->cli_echo(implode(chr(10), $this->urlList).chr(10), 1); |
|
2013 | 2013 | } |
2014 | 2014 | } |
2015 | 2015 | |
@@ -2034,12 +2034,12 @@ discard block |
||
2034 | 2034 | } |
2035 | 2035 | |
2036 | 2036 | $cliObj->cli_validateArgs(); |
2037 | - $pageId = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_args['_DEFAULT'][1],0); |
|
2037 | + $pageId = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_args['_DEFAULT'][1], 0); |
|
2038 | 2038 | $fullFlush = ($pageId == 0); |
2039 | 2039 | |
2040 | 2040 | $mode = $cliObj->cli_argValue('-o'); |
2041 | 2041 | |
2042 | - switch($mode) { |
|
2042 | + switch ($mode) { |
|
2043 | 2043 | case 'all': |
2044 | 2044 | $result = $this->getLogEntriesForPageId($pageId, '', true, $fullFlush); |
2045 | 2045 | break; |
@@ -2062,7 +2062,7 @@ discard block |
||
2062 | 2062 | * @param tx_crawler_cli_im $cliObj Command line object |
2063 | 2063 | * @return mixed Array of keys or null if no keys found |
2064 | 2064 | */ |
2065 | - protected function getConfigurationKeys(tx_crawler_cli_im &$cliObj) { |
|
2065 | + protected function getConfigurationKeys(tx_crawler_cli_im & $cliObj) { |
|
2066 | 2066 | $parameter = trim($cliObj->cli_argValue('-conf')); |
2067 | 2067 | return ($parameter != '' ? \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $parameter) : array()); |
2068 | 2068 | } |
@@ -2087,7 +2087,7 @@ discard block |
||
2087 | 2087 | $purgeDate = $this->getCurrentTime() - 24 * 60 * 60 * intval($this->extensionSettings['purgeQueueDays']); |
2088 | 2088 | $del = $this->db->exec_DELETEquery( |
2089 | 2089 | 'tx_crawler_queue', |
2090 | - 'exec_time!=0 AND exec_time<' . $purgeDate |
|
2090 | + 'exec_time!=0 AND exec_time<'.$purgeDate |
|
2091 | 2091 | ); |
2092 | 2092 | } |
2093 | 2093 | |
@@ -2104,10 +2104,10 @@ discard block |
||
2104 | 2104 | intval($countInARun) |
2105 | 2105 | ); |
2106 | 2106 | |
2107 | - if (count($rows)>0) { |
|
2107 | + if (count($rows) > 0) { |
|
2108 | 2108 | $quidList = array(); |
2109 | 2109 | |
2110 | - foreach($rows as $r) { |
|
2110 | + foreach ($rows as $r) { |
|
2111 | 2111 | $quidList[] = $r['qid']; |
2112 | 2112 | } |
2113 | 2113 | |
@@ -2118,7 +2118,7 @@ discard block |
||
2118 | 2118 | //TODO make sure we're not taking assigned queue-entires |
2119 | 2119 | $this->db->exec_UPDATEquery( |
2120 | 2120 | 'tx_crawler_queue', |
2121 | - 'qid IN ('.implode(',',$quidList).')', |
|
2121 | + 'qid IN ('.implode(',', $quidList).')', |
|
2122 | 2122 | array( |
2123 | 2123 | 'process_scheduled' => intval($this->getCurrentTime()), |
2124 | 2124 | 'process_id' => $processId |
@@ -2129,32 +2129,32 @@ discard block |
||
2129 | 2129 | $numberOfAffectedRows = $this->db->sql_affected_rows(); |
2130 | 2130 | $this->db->exec_UPDATEquery( |
2131 | 2131 | 'tx_crawler_process', |
2132 | - "process_id = '".$processId."'" , |
|
2132 | + "process_id = '".$processId."'", |
|
2133 | 2133 | array( |
2134 | 2134 | 'assigned_items_count' => intval($numberOfAffectedRows) |
2135 | 2135 | ) |
2136 | 2136 | ); |
2137 | 2137 | |
2138 | - if($numberOfAffectedRows == count($quidList)) { |
|
2138 | + if ($numberOfAffectedRows == count($quidList)) { |
|
2139 | 2139 | $this->db->sql_query('COMMIT'); |
2140 | - } else { |
|
2140 | + } else { |
|
2141 | 2141 | $this->db->sql_query('ROLLBACK'); |
2142 | 2142 | $this->CLI_debug("Nothing processed due to multi-process collision (".$this->CLI_buildProcessId().")"); |
2143 | - return ( $result | self::CLI_STATUS_ABORTED ); |
|
2143 | + return ($result | self::CLI_STATUS_ABORTED); |
|
2144 | 2144 | } |
2145 | 2145 | |
2146 | 2146 | |
2147 | 2147 | |
2148 | - foreach($rows as $r) { |
|
2148 | + foreach ($rows as $r) { |
|
2149 | 2149 | $result |= $this->readUrl($r['qid']); |
2150 | 2150 | |
2151 | 2151 | $counter++; |
2152 | - usleep(intval($sleepTime)); // Just to relax the system |
|
2152 | + usleep(intval($sleepTime)); // Just to relax the system |
|
2153 | 2153 | |
2154 | 2154 | // if during the start and the current read url the cli has been disable we need to return from the function |
2155 | 2155 | // mark the process NOT as ended. |
2156 | 2156 | if ($this->getDisabled()) { |
2157 | - return ( $result | self::CLI_STATUS_ABORTED ); |
|
2157 | + return ($result | self::CLI_STATUS_ABORTED); |
|
2158 | 2158 | } |
2159 | 2159 | |
2160 | 2160 | if (!$this->CLI_checkIfProcessIsActive($this->CLI_buildProcessId())) { |
@@ -2162,7 +2162,7 @@ discard block |
||
2162 | 2162 | |
2163 | 2163 | //TODO might need an additional returncode |
2164 | 2164 | $result |= self::CLI_STATUS_ABORTED; |
2165 | - break; //possible timeout |
|
2165 | + break; //possible timeout |
|
2166 | 2166 | } |
2167 | 2167 | } |
2168 | 2168 | |
@@ -2175,7 +2175,7 @@ discard block |
||
2175 | 2175 | $this->CLI_debug("Nothing within queue which needs to be processed (".$this->CLI_buildProcessId().")"); |
2176 | 2176 | } |
2177 | 2177 | |
2178 | - if($counter > 0) { |
|
2178 | + if ($counter > 0) { |
|
2179 | 2179 | $result |= self::CLI_STATUS_PROCESSED; |
2180 | 2180 | } |
2181 | 2181 | |
@@ -2187,12 +2187,12 @@ discard block |
||
2187 | 2187 | * |
2188 | 2188 | * @return void |
2189 | 2189 | */ |
2190 | - function CLI_runHooks() { |
|
2190 | + function CLI_runHooks() { |
|
2191 | 2191 | global $TYPO3_CONF_VARS; |
2192 | - if (is_array($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'])) { |
|
2193 | - foreach($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'] as $objRef) { |
|
2192 | + if (is_array($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'])) { |
|
2193 | + foreach ($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'] as $objRef) { |
|
2194 | 2194 | $hookObj = &\TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($objRef); |
2195 | - if (is_object($hookObj)) { |
|
2195 | + if (is_object($hookObj)) { |
|
2196 | 2196 | $hookObj->crawler_init($this); |
2197 | 2197 | } |
2198 | 2198 | } |
@@ -2229,7 +2229,7 @@ discard block |
||
2229 | 2229 | |
2230 | 2230 | $currentTime = $this->getCurrentTime(); |
2231 | 2231 | |
2232 | - while($row = $this->db->sql_fetch_assoc($res)) { |
|
2232 | + while ($row = $this->db->sql_fetch_assoc($res)) { |
|
2233 | 2233 | if ($row['ttl'] < $currentTime) { |
2234 | 2234 | $orphanProcesses[] = $row['process_id']; |
2235 | 2235 | } else { |
@@ -2239,7 +2239,7 @@ discard block |
||
2239 | 2239 | |
2240 | 2240 | // if there are less than allowed active processes then add a new one |
2241 | 2241 | if ($processCount < intval($this->extensionSettings['processLimit'])) { |
2242 | - $this->CLI_debug("add process ".$this->CLI_buildProcessId()." (".($processCount+1)."/".intval($this->extensionSettings['processLimit']).")"); |
|
2242 | + $this->CLI_debug("add process ".$this->CLI_buildProcessId()." (".($processCount + 1)."/".intval($this->extensionSettings['processLimit']).")"); |
|
2243 | 2243 | |
2244 | 2244 | // create new process record |
2245 | 2245 | $this->db->exec_INSERTquery( |
@@ -2272,17 +2272,17 @@ discard block |
||
2272 | 2272 | * @param boolean $withinLock show whether the DB-actions are included within an existing lock |
2273 | 2273 | * @return boolean |
2274 | 2274 | */ |
2275 | - function CLI_releaseProcesses($releaseIds, $withinLock=false) { |
|
2275 | + function CLI_releaseProcesses($releaseIds, $withinLock = false) { |
|
2276 | 2276 | |
2277 | 2277 | if (!is_array($releaseIds)) { |
2278 | 2278 | $releaseIds = array($releaseIds); |
2279 | 2279 | } |
2280 | 2280 | |
2281 | 2281 | if (!count($releaseIds) > 0) { |
2282 | - return false; //nothing to release |
|
2282 | + return false; //nothing to release |
|
2283 | 2283 | } |
2284 | 2284 | |
2285 | - if(!$withinLock) $this->db->sql_query('BEGIN'); |
|
2285 | + if (!$withinLock) $this->db->sql_query('BEGIN'); |
|
2286 | 2286 | |
2287 | 2287 | // some kind of 2nd chance algo - this way you need at least 2 processes to have a real cleanup |
2288 | 2288 | // this ensures that a single process can't mess up the entire process table |
@@ -2312,21 +2312,21 @@ discard block |
||
2312 | 2312 | // mark all requested processes as non-active |
2313 | 2313 | $this->db->exec_UPDATEquery( |
2314 | 2314 | 'tx_crawler_process', |
2315 | - 'process_id IN (\''.implode('\',\'',$releaseIds).'\') AND deleted=0', |
|
2315 | + 'process_id IN (\''.implode('\',\'', $releaseIds).'\') AND deleted=0', |
|
2316 | 2316 | array( |
2317 | 2317 | 'active'=>'0' |
2318 | 2318 | ) |
2319 | 2319 | ); |
2320 | 2320 | $this->db->exec_UPDATEquery( |
2321 | 2321 | 'tx_crawler_queue', |
2322 | - 'exec_time=0 AND process_id IN ("'.implode('","',$releaseIds).'")', |
|
2322 | + 'exec_time=0 AND process_id IN ("'.implode('","', $releaseIds).'")', |
|
2323 | 2323 | array( |
2324 | 2324 | 'process_scheduled'=>0, |
2325 | 2325 | 'process_id'=>'' |
2326 | 2326 | ) |
2327 | 2327 | ); |
2328 | 2328 | |
2329 | - if(!$withinLock) $this->db->sql_query('COMMIT'); |
|
2329 | + if (!$withinLock) $this->db->sql_query('COMMIT'); |
|
2330 | 2330 | |
2331 | 2331 | return true; |
2332 | 2332 | } |
@@ -2354,13 +2354,13 @@ discard block |
||
2354 | 2354 | $this->db->sql_query('BEGIN'); |
2355 | 2355 | $res = $this->db->exec_SELECTquery( |
2356 | 2356 | 'process_id,active,ttl', |
2357 | - 'tx_crawler_process','process_id = \''.$pid.'\' AND deleted=0', |
|
2357 | + 'tx_crawler_process', 'process_id = \''.$pid.'\' AND deleted=0', |
|
2358 | 2358 | '', |
2359 | 2359 | 'ttl', |
2360 | 2360 | '0,1' |
2361 | 2361 | ); |
2362 | - if($row = $this->db->sql_fetch_assoc($res)) { |
|
2363 | - $ret = intVal($row['active'])==1; |
|
2362 | + if ($row = $this->db->sql_fetch_assoc($res)) { |
|
2363 | + $ret = intVal($row['active']) == 1; |
|
2364 | 2364 | } |
2365 | 2365 | $this->db->sql_query('COMMIT'); |
2366 | 2366 | |
@@ -2373,8 +2373,8 @@ discard block |
||
2373 | 2373 | * @return string the ID |
2374 | 2374 | */ |
2375 | 2375 | protected function CLI_buildProcessId() { |
2376 | - if(!$this->processID) { |
|
2377 | - $this->processID= \TYPO3\CMS\Core\Utility\GeneralUtility::shortMD5($this->microtime(true)); |
|
2376 | + if (!$this->processID) { |
|
2377 | + $this->processID = \TYPO3\CMS\Core\Utility\GeneralUtility::shortMD5($this->microtime(true)); |
|
2378 | 2378 | } |
2379 | 2379 | return $this->processID; |
2380 | 2380 | } |
@@ -2386,7 +2386,7 @@ discard block |
||
2386 | 2386 | * |
2387 | 2387 | * @codeCoverageIgnore |
2388 | 2388 | */ |
2389 | - protected function microtime($get_as_float = false ) |
|
2389 | + protected function microtime($get_as_float = false) |
|
2390 | 2390 | { |
2391 | 2391 | return microtime($get_as_float); |
2392 | 2392 | } |
@@ -2399,7 +2399,7 @@ discard block |
||
2399 | 2399 | * @codeCoverageIgnore |
2400 | 2400 | */ |
2401 | 2401 | function CLI_debug($msg) { |
2402 | - if(intval($this->extensionSettings['processDebug'])) { |
|
2402 | + if (intval($this->extensionSettings['processDebug'])) { |
|
2403 | 2403 | echo $msg."\n"; flush(); |
2404 | 2404 | } |
2405 | 2405 | } |
@@ -2418,7 +2418,7 @@ discard block |
||
2418 | 2418 | |
2419 | 2419 | $cmd = escapeshellcmd($this->extensionSettings['phpPath']); |
2420 | 2420 | $cmd .= ' '; |
2421 | - $cmd .= escapeshellarg(\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::extPath('crawler') . 'cli/bootstrap.php'); |
|
2421 | + $cmd .= escapeshellarg(\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::extPath('crawler').'cli/bootstrap.php'); |
|
2422 | 2422 | $cmd .= ' '; |
2423 | 2423 | $cmd .= escapeshellarg($this->getFrontendBasePath()); |
2424 | 2424 | $cmd .= ' '; |
@@ -2428,10 +2428,10 @@ discard block |
||
2428 | 2428 | |
2429 | 2429 | $startTime = microtime(true); |
2430 | 2430 | $content = $this->executeShellCommand($cmd); |
2431 | - $this->log($url . (microtime(true) - $startTime)); |
|
2431 | + $this->log($url.(microtime(true) - $startTime)); |
|
2432 | 2432 | |
2433 | 2433 | $result = array( |
2434 | - 'request' => implode("\r\n", $requestHeaders) . "\r\n\r\n", |
|
2434 | + 'request' => implode("\r\n", $requestHeaders)."\r\n\r\n", |
|
2435 | 2435 | 'headers' => '', |
2436 | 2436 | 'content' => $content |
2437 | 2437 | ); |
@@ -2451,7 +2451,7 @@ discard block |
||
2451 | 2451 | $scheduledAgeInSeconds = $this->extensionSettings['cleanUpScheduledAge'] * 86400; |
2452 | 2452 | |
2453 | 2453 | $now = time(); |
2454 | - $condition = '(exec_time<>0 AND exec_time<' . ($now - $processedAgeInSeconds) . ') OR scheduled<=' . ($now - $scheduledAgeInSeconds); |
|
2454 | + $condition = '(exec_time<>0 AND exec_time<'.($now - $processedAgeInSeconds).') OR scheduled<='.($now - $scheduledAgeInSeconds); |
|
2455 | 2455 | $this->flushQueue($condition); |
2456 | 2456 | } |
2457 | 2457 | |
@@ -2472,7 +2472,7 @@ discard block |
||
2472 | 2472 | $GLOBALS['TT']->start(); |
2473 | 2473 | } |
2474 | 2474 | |
2475 | - $GLOBALS['TSFE'] = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\Controller\\TypoScriptFrontendController', $GLOBALS['TYPO3_CONF_VARS'], $id, $typeNum); |
|
2475 | + $GLOBALS['TSFE'] = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\Controller\\TypoScriptFrontendController', $GLOBALS['TYPO3_CONF_VARS'], $id, $typeNum); |
|
2476 | 2476 | $GLOBALS['TSFE']->sys_page = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\Page\\PageRepository'); |
2477 | 2477 | $GLOBALS['TSFE']->sys_page->init(TRUE); |
2478 | 2478 | $GLOBALS['TSFE']->connectToDB(); |
@@ -2485,6 +2485,6 @@ discard block |
||
2485 | 2485 | } |
2486 | 2486 | } |
2487 | 2487 | |
2488 | -if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/class.tx_crawler_lib.php']) { |
|
2488 | +if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/class.tx_crawler_lib.php']) { |
|
2489 | 2489 | include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/class.tx_crawler_lib.php']); |
2490 | 2490 | } |
@@ -24,43 +24,43 @@ |
||
24 | 24 | |
25 | 25 | abstract class tx_crawler_domain_lib_abstract_dbobject { |
26 | 26 | |
27 | - /** |
|
28 | - * @var array |
|
29 | - */ |
|
30 | - protected $row; |
|
27 | + /** |
|
28 | + * @var array |
|
29 | + */ |
|
30 | + protected $row; |
|
31 | 31 | |
32 | - /** |
|
33 | - * @var string table name |
|
34 | - */ |
|
35 | - protected static $tableName; |
|
32 | + /** |
|
33 | + * @var string table name |
|
34 | + */ |
|
35 | + protected static $tableName; |
|
36 | 36 | |
37 | - /** |
|
38 | - * Constructor |
|
39 | - * |
|
40 | - * @param array $row optional array with propertys |
|
41 | - */ |
|
42 | - public function __construct($row = array()) { |
|
43 | - $this->row = $row; |
|
44 | - } |
|
37 | + /** |
|
38 | + * Constructor |
|
39 | + * |
|
40 | + * @param array $row optional array with propertys |
|
41 | + */ |
|
42 | + public function __construct($row = array()) { |
|
43 | + $this->row = $row; |
|
44 | + } |
|
45 | 45 | |
46 | - /** |
|
47 | - * Get table name |
|
48 | - * |
|
49 | - * @param void |
|
50 | - * @return string table name |
|
51 | - */ |
|
52 | - public static function getTableName(){ |
|
53 | - return self::$tableName; |
|
54 | - } |
|
46 | + /** |
|
47 | + * Get table name |
|
48 | + * |
|
49 | + * @param void |
|
50 | + * @return string table name |
|
51 | + */ |
|
52 | + public static function getTableName(){ |
|
53 | + return self::$tableName; |
|
54 | + } |
|
55 | 55 | |
56 | - /** |
|
57 | - * Returns the propertys of the object as array |
|
58 | - * |
|
59 | - * @return array |
|
60 | - */ |
|
61 | - public function getRow() { |
|
62 | - return $this->row; |
|
63 | - } |
|
56 | + /** |
|
57 | + * Returns the propertys of the object as array |
|
58 | + * |
|
59 | + * @return array |
|
60 | + */ |
|
61 | + public function getRow() { |
|
62 | + return $this->row; |
|
63 | + } |
|
64 | 64 | |
65 | 65 | |
66 | 66 | } |
@@ -49,7 +49,7 @@ |
||
49 | 49 | * @param void |
50 | 50 | * @return string table name |
51 | 51 | */ |
52 | - public static function getTableName(){ |
|
52 | + public static function getTableName() { |
|
53 | 53 | return self::$tableName; |
54 | 54 | } |
55 | 55 |
@@ -24,19 +24,19 @@ |
||
24 | 24 | |
25 | 25 | class tx_crawler_domain_queue_entry extends tx_crawler_domain_lib_abstract_dbobject { |
26 | 26 | |
27 | - /** |
|
28 | - * @var string table name |
|
29 | - */ |
|
30 | - protected static $tableName = 'tx_crawler_queue'; |
|
27 | + /** |
|
28 | + * @var string table name |
|
29 | + */ |
|
30 | + protected static $tableName = 'tx_crawler_queue'; |
|
31 | 31 | |
32 | - /** |
|
33 | - * Returns the execution time of the record as int value |
|
34 | - * |
|
35 | - * @return int |
|
36 | - */ |
|
37 | - public function getExecutionTime(){ |
|
38 | - return $this->row['exec_time']; |
|
39 | - } |
|
32 | + /** |
|
33 | + * Returns the execution time of the record as int value |
|
34 | + * |
|
35 | + * @return int |
|
36 | + */ |
|
37 | + public function getExecutionTime(){ |
|
38 | + return $this->row['exec_time']; |
|
39 | + } |
|
40 | 40 | |
41 | 41 | } |
42 | 42 |
@@ -34,7 +34,7 @@ |
||
34 | 34 | * |
35 | 35 | * @return int |
36 | 36 | */ |
37 | - public function getExecutionTime(){ |
|
37 | + public function getExecutionTime() { |
|
38 | 38 | return $this->row['exec_time']; |
39 | 39 | } |
40 | 40 |
@@ -69,7 +69,7 @@ |
||
69 | 69 | { |
70 | 70 | $db = $this->getDB(); |
71 | 71 | $where = 'process_id_completed=' . $db->fullQuoteStr($process->getProcess_id(), $this->tableName) . |
72 | - ' AND exec_time > 0 '; |
|
72 | + ' AND exec_time > 0 '; |
|
73 | 73 | $limit = 1; |
74 | 74 | $groupby = ''; |
75 | 75 |
@@ -68,7 +68,7 @@ discard block |
||
68 | 68 | protected function getFirstOrLastObjectByProcess($process, $orderby) |
69 | 69 | { |
70 | 70 | $db = $this->getDB(); |
71 | - $where = 'process_id_completed=' . $db->fullQuoteStr($process->getProcess_id(), $this->tableName) . |
|
71 | + $where = 'process_id_completed='.$db->fullQuoteStr($process->getProcess_id(), $this->tableName). |
|
72 | 72 | ' AND exec_time > 0 '; |
73 | 73 | $limit = 1; |
74 | 74 | $groupby = ''; |
@@ -93,7 +93,7 @@ discard block |
||
93 | 93 | */ |
94 | 94 | public function countExecutedItemsByProcess($process) |
95 | 95 | { |
96 | - return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = ' . $this->getDB()->fullQuoteStr($process->getProcess_id(), |
|
96 | + return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = '.$this->getDB()->fullQuoteStr($process->getProcess_id(), |
|
97 | 97 | $this->tableName)); |
98 | 98 | } |
99 | 99 | |
@@ -106,7 +106,7 @@ discard block |
||
106 | 106 | */ |
107 | 107 | public function countNonExecutedItemsByProcess($process) |
108 | 108 | { |
109 | - return $this->countItemsByWhereClause('exec_time = 0 AND process_id = ' . $this->getDB()->fullQuoteStr($process->getProcess_id(), |
|
109 | + return $this->countItemsByWhereClause('exec_time = 0 AND process_id = '.$this->getDB()->fullQuoteStr($process->getProcess_id(), |
|
110 | 110 | $this->tableName)); |
111 | 111 | } |
112 | 112 | |
@@ -120,7 +120,7 @@ discard block |
||
120 | 120 | */ |
121 | 121 | public function countAllPendingItems() |
122 | 122 | { |
123 | - return $this->countItemsByWhereClause('exec_time = 0 AND scheduled < ' . time()); |
|
123 | + return $this->countItemsByWhereClause('exec_time = 0 AND scheduled < '.time()); |
|
124 | 124 | } |
125 | 125 | |
126 | 126 | /** |
@@ -133,7 +133,7 @@ discard block |
||
133 | 133 | */ |
134 | 134 | public function countAllAssignedPendingItems() |
135 | 135 | { |
136 | - return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id != ''"); |
|
136 | + return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < ".time()." AND process_id != ''"); |
|
137 | 137 | } |
138 | 138 | |
139 | 139 | /** |
@@ -146,7 +146,7 @@ discard block |
||
146 | 146 | */ |
147 | 147 | public function countAllUnassignedPendingItems() |
148 | 148 | { |
149 | - return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id = ''"); |
|
149 | + return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < ".time()." AND process_id = ''"); |
|
150 | 150 | } |
151 | 151 | |
152 | 152 | /** |
@@ -176,7 +176,7 @@ discard block |
||
176 | 176 | $res = $db->exec_SELECTquery( |
177 | 177 | "configuration, count(*) as unprocessed, sum(process_id != '') as assignedButUnprocessed", |
178 | 178 | $this->tableName, |
179 | - 'exec_time = 0 AND scheduled < ' . time(), |
|
179 | + 'exec_time = 0 AND scheduled < '.time(), |
|
180 | 180 | 'configuration' |
181 | 181 | ); |
182 | 182 | $rows = array(); |
@@ -200,7 +200,7 @@ discard block |
||
200 | 200 | $res = $db->exec_SELECTquery( |
201 | 201 | 'set_id', |
202 | 202 | $this->tableName, |
203 | - 'exec_time = 0 AND scheduled < ' . time(), |
|
203 | + 'exec_time = 0 AND scheduled < '.time(), |
|
204 | 204 | 'set_id' |
205 | 205 | ); |
206 | 206 | $setIds = array(); |
@@ -226,7 +226,7 @@ discard block |
||
226 | 226 | $res = $db->exec_SELECTquery( |
227 | 227 | 'configuration, count(*) as c', |
228 | 228 | $this->tableName, |
229 | - 'set_id in (' . implode(',', $setIds) . ') AND scheduled < ' . time(), |
|
229 | + 'set_id in ('.implode(',', $setIds).') AND scheduled < '.time(), |
|
230 | 230 | 'configuration' |
231 | 231 | ); |
232 | 232 | while ($row = $db->sql_fetch_assoc($res)) { |
@@ -306,7 +306,7 @@ discard block |
||
306 | 306 | $res = $db->exec_SELECTquery( |
307 | 307 | 'process_id_completed, min(exec_time) as start, max(exec_time) as end, count(*) as urlcount', |
308 | 308 | $this->tableName, |
309 | - 'exec_time != 0 and exec_time >= ' . intval($start) . ' and exec_time <= ' . intval($end), |
|
309 | + 'exec_time != 0 and exec_time >= '.intval($start).' and exec_time <= '.intval($end), |
|
310 | 310 | 'process_id_completed' |
311 | 311 | ); |
312 | 312 |
@@ -24,90 +24,90 @@ |
||
24 | 24 | |
25 | 25 | class tx_crawler_domain_reason extends tx_crawler_domain_lib_abstract_dbobject { |
26 | 26 | |
27 | - protected static $tableName = 'tx_crawler_reason'; |
|
27 | + protected static $tableName = 'tx_crawler_reason'; |
|
28 | 28 | |
29 | - /** |
|
30 | - * THE CONSTANTS REPRESENT THE KIND OF THE REASON |
|
31 | - * |
|
32 | - * Convention for own states: <extensionkey>_<reason> |
|
33 | - */ |
|
34 | - const REASON_DEFAULT = 'crawler_default_reason'; |
|
35 | - const REASON_GUI_SUBMIT = 'crawler_gui_submit_reason'; |
|
36 | - const REASON_CLI_SUBMIT = 'crawler_cli_submit_reason'; |
|
29 | + /** |
|
30 | + * THE CONSTANTS REPRESENT THE KIND OF THE REASON |
|
31 | + * |
|
32 | + * Convention for own states: <extensionkey>_<reason> |
|
33 | + */ |
|
34 | + const REASON_DEFAULT = 'crawler_default_reason'; |
|
35 | + const REASON_GUI_SUBMIT = 'crawler_gui_submit_reason'; |
|
36 | + const REASON_CLI_SUBMIT = 'crawler_cli_submit_reason'; |
|
37 | 37 | |
38 | - /** |
|
39 | - * Set uid |
|
40 | - * |
|
41 | - * @param int uid |
|
42 | - * @return void |
|
43 | - */ |
|
44 | - public function setUid($uid) { |
|
45 | - $this->row['uid'] = $uid; |
|
46 | - } |
|
38 | + /** |
|
39 | + * Set uid |
|
40 | + * |
|
41 | + * @param int uid |
|
42 | + * @return void |
|
43 | + */ |
|
44 | + public function setUid($uid) { |
|
45 | + $this->row['uid'] = $uid; |
|
46 | + } |
|
47 | 47 | |
48 | - /** |
|
49 | - * Method to set a timestamp for the creation time of this record |
|
50 | - * |
|
51 | - * @param int $time |
|
52 | - */ |
|
53 | - public function setCreationDate($time) { |
|
54 | - $this->row['crdate'] = $time; |
|
55 | - } |
|
48 | + /** |
|
49 | + * Method to set a timestamp for the creation time of this record |
|
50 | + * |
|
51 | + * @param int $time |
|
52 | + */ |
|
53 | + public function setCreationDate($time) { |
|
54 | + $this->row['crdate'] = $time; |
|
55 | + } |
|
56 | 56 | |
57 | - /** |
|
58 | - * This method can be used to set a user id of the user who has created this reason entry |
|
59 | - * |
|
60 | - * @param int $user_id |
|
61 | - */ |
|
62 | - public function setBackendUserId($user_id) { |
|
63 | - $this->row['cruser_id'] = $user_id; |
|
64 | - } |
|
57 | + /** |
|
58 | + * This method can be used to set a user id of the user who has created this reason entry |
|
59 | + * |
|
60 | + * @param int $user_id |
|
61 | + */ |
|
62 | + public function setBackendUserId($user_id) { |
|
63 | + $this->row['cruser_id'] = $user_id; |
|
64 | + } |
|
65 | 65 | |
66 | - /** |
|
67 | - * Method to set the type of the reason for this reason instance (see constances) |
|
68 | - * |
|
69 | - * @param string $string |
|
70 | - */ |
|
71 | - public function setReason($string) { |
|
72 | - $this->row['reason'] = $string; |
|
73 | - } |
|
66 | + /** |
|
67 | + * Method to set the type of the reason for this reason instance (see constances) |
|
68 | + * |
|
69 | + * @param string $string |
|
70 | + */ |
|
71 | + public function setReason($string) { |
|
72 | + $this->row['reason'] = $string; |
|
73 | + } |
|
74 | 74 | |
75 | - /** |
|
76 | - * This method returns the attached reason text. |
|
77 | - * @return string |
|
78 | - */ |
|
79 | - public function getReason() { |
|
80 | - return $this->row['reason']; |
|
81 | - } |
|
75 | + /** |
|
76 | + * This method returns the attached reason text. |
|
77 | + * @return string |
|
78 | + */ |
|
79 | + public function getReason() { |
|
80 | + return $this->row['reason']; |
|
81 | + } |
|
82 | 82 | |
83 | - /** |
|
84 | - * This method can be used to assign a detail text to the crawler reason |
|
85 | - * |
|
86 | - * @param string $detail_text |
|
87 | - */ |
|
88 | - public function setDetailText($detail_text) { |
|
89 | - $this->row['detail_text'] = $detail_text; |
|
90 | - } |
|
83 | + /** |
|
84 | + * This method can be used to assign a detail text to the crawler reason |
|
85 | + * |
|
86 | + * @param string $detail_text |
|
87 | + */ |
|
88 | + public function setDetailText($detail_text) { |
|
89 | + $this->row['detail_text'] = $detail_text; |
|
90 | + } |
|
91 | 91 | |
92 | - /** |
|
93 | - * Returns the attachet detail text. |
|
94 | - * |
|
95 | - * @param void |
|
96 | - * @return string |
|
97 | - */ |
|
98 | - public function getDetailText() { |
|
99 | - return $this->row['detail_text']; |
|
100 | - } |
|
92 | + /** |
|
93 | + * Returns the attachet detail text. |
|
94 | + * |
|
95 | + * @param void |
|
96 | + * @return string |
|
97 | + */ |
|
98 | + public function getDetailText() { |
|
99 | + return $this->row['detail_text']; |
|
100 | + } |
|
101 | 101 | |
102 | - /** |
|
103 | - * This method is used to set the uid of the queue entry |
|
104 | - * where the reason is relevant for. |
|
105 | - * |
|
106 | - * @param int $entry_uid |
|
107 | - */ |
|
108 | - public function setQueueEntryUid($entry_uid) { |
|
109 | - $this->row['queue_entry_uid'] = $entry_uid; |
|
110 | - } |
|
102 | + /** |
|
103 | + * This method is used to set the uid of the queue entry |
|
104 | + * where the reason is relevant for. |
|
105 | + * |
|
106 | + * @param int $entry_uid |
|
107 | + */ |
|
108 | + public function setQueueEntryUid($entry_uid) { |
|
109 | + $this->row['queue_entry_uid'] = $entry_uid; |
|
110 | + } |
|
111 | 111 | |
112 | 112 | } |
113 | 113 |
@@ -66,7 +66,7 @@ discard block |
||
66 | 66 | $this->tableName, |
67 | 67 | $where, |
68 | 68 | '', |
69 | - htmlspecialchars($orderField) . ' ' . htmlspecialchars($orderDirection), |
|
69 | + htmlspecialchars($orderField).' '.htmlspecialchars($orderDirection), |
|
70 | 70 | self::getLimitFromItemCountAndOffset($itemCount, $offset) |
71 | 71 | ); |
72 | 72 | |
@@ -111,7 +111,7 @@ discard block |
||
111 | 111 | */ |
112 | 112 | public function countNotTimeouted($ttl) |
113 | 113 | { |
114 | - return $this->countByWhere('deleted = 0 AND ttl > ' . intval($ttl)); |
|
114 | + return $this->countByWhere('deleted = 0 AND ttl > '.intval($ttl)); |
|
115 | 115 | } |
116 | 116 | |
117 | 117 | /** |
@@ -127,7 +127,7 @@ discard block |
||
127 | 127 | { |
128 | 128 | $itemCount = filter_var($itemCount, FILTER_VALIDATE_INT, ['options' => ['min_range' => 1, 'default' => 20]]); |
129 | 129 | $offset = filter_var($offset, FILTER_VALIDATE_INT, ['options' => ['min_range' => 0, 'default' => 0]]); |
130 | - $limit = $offset . ', ' . $itemCount; |
|
130 | + $limit = $offset.', '.$itemCount; |
|
131 | 131 | |
132 | 132 | return $limit; |
133 | 133 | } |
@@ -26,7 +26,7 @@ discard block |
||
26 | 26 | * Manages cralwer processes and can be used to start a new process or multiple processes |
27 | 27 | * |
28 | 28 | */ |
29 | -class tx_crawler_domain_process_manager { |
|
29 | +class tx_crawler_domain_process_manager { |
|
30 | 30 | |
31 | 31 | /** |
32 | 32 | * @var $timeToLive integer |
@@ -83,21 +83,21 @@ discard block |
||
83 | 83 | * |
84 | 84 | * @throws RuntimeException |
85 | 85 | */ |
86 | - public function multiProcess( $timeout ) { |
|
86 | + public function multiProcess($timeout) { |
|
87 | 87 | |
88 | 88 | if ($this->processLimit <= 1) { |
89 | - throw new RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL); |
|
89 | + throw new RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.'.PHP_EOL); |
|
90 | 90 | } |
91 | 91 | |
92 | 92 | $pendingItemsStart = $this->queueRepository->countAllPendingItems(); |
93 | 93 | $itemReportLimit = 20; |
94 | - $reportItemCount = $pendingItemsStart - $itemReportLimit; |
|
94 | + $reportItemCount = $pendingItemsStart - $itemReportLimit; |
|
95 | 95 | if ($this->verbose) { |
96 | 96 | $this->reportItemStatus(); |
97 | 97 | } |
98 | 98 | $this->startRequiredProcesses(); |
99 | 99 | $nextTimeOut = time() + $this->timeToLive; |
100 | - for ($i=0; $i<$timeout; $i++) { |
|
100 | + for ($i = 0; $i < $timeout; $i++) { |
|
101 | 101 | $currentPendingItems = $this->queueRepository->countAllPendingItems(); |
102 | 102 | if ($this->startRequiredProcesses($this->verbose)) { |
103 | 103 | $nextTimeOut = time() + $this->timeToLive; |
@@ -112,16 +112,16 @@ discard block |
||
112 | 112 | if ($this->verbose) { |
113 | 113 | $this->reportItemStatus(); |
114 | 114 | } |
115 | - $reportItemCount = $currentPendingItems - $itemReportLimit; |
|
115 | + $reportItemCount = $currentPendingItems - $itemReportLimit; |
|
116 | 116 | } |
117 | 117 | sleep(1); |
118 | 118 | if ($nextTimeOut < time()) { |
119 | - $timedOutProcesses = $this->processRepository->findAll('','DESC',NULL,0,'ttl >'.$nextTimeOut); |
|
119 | + $timedOutProcesses = $this->processRepository->findAll('', 'DESC', NULL, 0, 'ttl >'.$nextTimeOut); |
|
120 | 120 | $nextTimeOut = time() + $this->timeToLive; |
121 | 121 | if ($this->verbose) { |
122 | - echo 'Cleanup'.implode(',',$timedOutProcesses->getProcessIds()).chr(10); |
|
122 | + echo 'Cleanup'.implode(',', $timedOutProcesses->getProcessIds()).chr(10); |
|
123 | 123 | } |
124 | - $this->crawlerObj->CLI_releaseProcesses($timedOutProcesses->getProcessIds(),true); |
|
124 | + $this->crawlerObj->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true); |
|
125 | 125 | } |
126 | 126 | } |
127 | 127 | if ($currentPendingItems > 0 && $this->verbose) { |
@@ -143,17 +143,17 @@ discard block |
||
143 | 143 | */ |
144 | 144 | private function startRequiredProcesses() { |
145 | 145 | $ret = FALSE; |
146 | - $currentProcesses= $this->processRepository->countActive(); |
|
147 | - $availableProcessesCount = $this->processLimit-$currentProcesses; |
|
146 | + $currentProcesses = $this->processRepository->countActive(); |
|
147 | + $availableProcessesCount = $this->processLimit - $currentProcesses; |
|
148 | 148 | $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun); |
149 | - $startProcessCount = min(array($availableProcessesCount,$requiredProcessesCount)); |
|
149 | + $startProcessCount = min(array($availableProcessesCount, $requiredProcessesCount)); |
|
150 | 150 | if ($startProcessCount <= 0) { |
151 | 151 | return $ret; |
152 | 152 | } |
153 | 153 | if ($startProcessCount && $this->verbose) { |
154 | 154 | echo 'Start '.$startProcessCount.' new processes (Running:'.$currentProcesses.')'; |
155 | 155 | } |
156 | - for($i=0;$i<$startProcessCount;$i++) { |
|
156 | + for ($i = 0; $i < $startProcessCount; $i++) { |
|
157 | 157 | usleep(100); |
158 | 158 | if ($this->startProcess()) { |
159 | 159 | if ($this->verbose) { |
@@ -173,25 +173,25 @@ discard block |
||
173 | 173 | * @throws Exception if no crawlerprocess was started |
174 | 174 | */ |
175 | 175 | public function startProcess() { |
176 | - $ttl = (time() + $this->timeToLive -1); |
|
176 | + $ttl = (time() + $this->timeToLive - 1); |
|
177 | 177 | $current = $this->processRepository->countNotTimeouted($ttl); |
178 | 178 | // Check whether OS is Windows |
179 | 179 | if (TYPO3_OS === 'WIN') { |
180 | - $sCompletePath = escapeshellcmd('start ' . $this->getCrawlerCliPath()); |
|
180 | + $sCompletePath = escapeshellcmd('start '.$this->getCrawlerCliPath()); |
|
181 | 181 | $oFileHandler = popen($sCompletePath, 'r'); |
182 | 182 | if ($oFileHandler !== false) { |
183 | 183 | pclose($oFileHandler); |
184 | 184 | } |
185 | 185 | } |
186 | 186 | else { |
187 | - $sCompletePath = '(' .escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null'; |
|
187 | + $sCompletePath = '('.escapeshellcmd($this->getCrawlerCliPath()).' &) > /dev/null'; |
|
188 | 188 | $oFileHandler = system($sCompletePath); |
189 | 189 | } |
190 | 190 | if ($oFileHandler === false) { |
191 | 191 | throw new Exception('could not start process!'); |
192 | 192 | } |
193 | 193 | else { |
194 | - for ($i=0;$i<10;$i++) { |
|
194 | + for ($i = 0; $i < 10; $i++) { |
|
195 | 195 | if ($this->processRepository->countNotTimeouted($ttl) > $current) { |
196 | 196 | return true; |
197 | 197 | } |
@@ -206,10 +206,10 @@ discard block |
||
206 | 206 | * |
207 | 207 | * @return string |
208 | 208 | */ |
209 | - public function getCrawlerCliPath(){ |
|
210 | - $phpPath = $this->crawlerObj->extensionSettings['phpPath'] . ' '; |
|
209 | + public function getCrawlerCliPath() { |
|
210 | + $phpPath = $this->crawlerObj->extensionSettings['phpPath'].' '; |
|
211 | 211 | $pathToTypo3 = rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_DOCUMENT_ROOT'), '/'); |
212 | - $pathToTypo3 .= rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_PATH'), '/'); |
|
212 | + $pathToTypo3 .= rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_PATH'), '/'); |
|
213 | 213 | $cliPart = '/typo3/cli_dispatch.phpsh crawler'; |
214 | 214 | |
215 | 215 | $scriptPath = $phpPath.$pathToTypo3.$cliPart; |
@@ -182,15 +182,13 @@ |
||
182 | 182 | if ($oFileHandler !== false) { |
183 | 183 | pclose($oFileHandler); |
184 | 184 | } |
185 | - } |
|
186 | - else { |
|
185 | + } else { |
|
187 | 186 | $sCompletePath = '(' .escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null'; |
188 | 187 | $oFileHandler = system($sCompletePath); |
189 | 188 | } |
190 | 189 | if ($oFileHandler === false) { |
191 | 190 | throw new Exception('could not start process!'); |
192 | - } |
|
193 | - else { |
|
191 | + } else { |
|
194 | 192 | for ($i=0;$i<10;$i++) { |
195 | 193 | if ($this->processRepository->countNotTimeouted($ttl) > $current) { |
196 | 194 | return true; |