@@ -66,10 +66,10 @@ |
||
| 66 | 66 | if ($this->isCrawlerExtensionRunning($frontend) && preg_match('#^/index.php\?&?id=(\d+)(&.*)?$#', $uri, $matches)) { |
| 67 | 67 | $speakingUri = $frontend->cObj->typoLink_URL(array('parameter' => $matches[1], 'additionalParams' => $matches[2])); |
| 68 | 68 | $speakingUriParts = parse_url($speakingUri); |
| 69 | - if(false === $speakingUriParts){ |
|
| 70 | - throw new \Exception('Could not parse URI: ' . $speakingUri, 1289915976); |
|
| 69 | + if (false === $speakingUriParts) { |
|
| 70 | + throw new \Exception('Could not parse URI: '.$speakingUri, 1289915976); |
|
| 71 | 71 | } |
| 72 | - $speakingUrlPath = '/' . ltrim($speakingUriParts['path'], '/'); |
|
| 72 | + $speakingUrlPath = '/'.ltrim($speakingUriParts['path'], '/'); |
|
| 73 | 73 | // Don't change anything if speaking URL is part of old URI: |
| 74 | 74 | // (it might be the case the using the speaking URL failed) |
| 75 | 75 | if (strpos($uri, $speakingUrlPath) !== 0 || $speakingUrlPath === '/') { |
@@ -92,7 +92,7 @@ discard block |
||
| 92 | 92 | $this->startPage = 0; |
| 93 | 93 | } |
| 94 | 94 | |
| 95 | - $_SERVER['argv'] = array($_SERVER['argv'][0], $this->startPage,'-ss', '-d', $this->depth, '-o', self::MODE, '-conf', implode(',', $this->configuration)); |
|
| 95 | + $_SERVER['argv'] = array($_SERVER['argv'][0], $this->startPage, '-ss', '-d', $this->depth, '-o', self::MODE, '-conf', implode(',', $this->configuration)); |
|
| 96 | 96 | } |
| 97 | 97 | |
| 98 | 98 | |
@@ -109,6 +109,6 @@ discard block |
||
| 109 | 109 | $this->startPage = 0; |
| 110 | 110 | } |
| 111 | 111 | |
| 112 | - return implode(',', $this->configuration) . ' (depth: ' . $this->depth . ', startPage:' . $this->startPage . ')'; |
|
| 112 | + return implode(',', $this->configuration).' (depth: '.$this->depth.', startPage:'.$this->startPage.')'; |
|
| 113 | 113 | } |
| 114 | 114 | } |
| 115 | 115 | \ No newline at end of file |
@@ -37,7 +37,7 @@ discard block |
||
| 37 | 37 | * @return tx_crawler_domain_queue_entry $entry |
| 38 | 38 | */ |
| 39 | 39 | public function findYoungestEntryForProcess(tx_crawler_domain_process $process) { |
| 40 | - return $this->getFirstOrLastObjectByProcess($process,'exec_time ASC'); |
|
| 40 | + return $this->getFirstOrLastObjectByProcess($process, 'exec_time ASC'); |
|
| 41 | 41 | } |
| 42 | 42 | |
| 43 | 43 | /** |
@@ -47,7 +47,7 @@ discard block |
||
| 47 | 47 | * @return tx_crawler_domain_queue_entry |
| 48 | 48 | */ |
| 49 | 49 | public function findOldestEntryForProcess(tx_crawler_domain_process $process) { |
| 50 | - return $this->getFirstOrLastObjectByProcess($process,'exec_time DESC'); |
|
| 50 | + return $this->getFirstOrLastObjectByProcess($process, 'exec_time DESC'); |
|
| 51 | 51 | } |
| 52 | 52 | |
| 53 | 53 | |
@@ -60,15 +60,15 @@ discard block |
||
| 60 | 60 | */ |
| 61 | 61 | protected function getFirstOrLastObjectByProcess($process, $orderby) { |
| 62 | 62 | $db = $this->getDB(); |
| 63 | - $where = 'process_id_completed='.$db->fullQuoteStr($process->getProcess_id(),$this->tableName). |
|
| 63 | + $where = 'process_id_completed='.$db->fullQuoteStr($process->getProcess_id(), $this->tableName). |
|
| 64 | 64 | ' AND exec_time > 0 '; |
| 65 | 65 | $limit = 1; |
| 66 | 66 | $groupby = ''; |
| 67 | 67 | |
| 68 | - $res = $db->exec_SELECTgetRows('*','tx_crawler_queue',$where,$groupby,$orderby,$limit); |
|
| 69 | - if($res) { |
|
| 70 | - $first = $res[0]; |
|
| 71 | - }else{ |
|
| 68 | + $res = $db->exec_SELECTgetRows('*', 'tx_crawler_queue', $where, $groupby, $orderby, $limit); |
|
| 69 | + if ($res) { |
|
| 70 | + $first = $res[0]; |
|
| 71 | + } else { |
|
| 72 | 72 | $first = array(); |
| 73 | 73 | } |
| 74 | 74 | $resultObject = new tx_crawler_domain_queue_entry($first); |
@@ -83,7 +83,7 @@ discard block |
||
| 83 | 83 | */ |
| 84 | 84 | public function countExtecutedItemsByProcess($process) { |
| 85 | 85 | |
| 86 | - return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = '.$this->getDB()->fullQuoteStr($process->getProcess_id(),$this->tableName)); |
|
| 86 | + return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = '.$this->getDB()->fullQuoteStr($process->getProcess_id(), $this->tableName)); |
|
| 87 | 87 | } |
| 88 | 88 | |
| 89 | 89 | /** |
@@ -93,7 +93,7 @@ discard block |
||
| 93 | 93 | * @return int |
| 94 | 94 | */ |
| 95 | 95 | public function countNonExecutedItemsByProcess($process) { |
| 96 | - return $this->countItemsByWhereClause('exec_time = 0 AND process_id = '.$this->getDB()->fullQuoteStr($process->getProcess_id(),$this->tableName)); |
|
| 96 | + return $this->countItemsByWhereClause('exec_time = 0 AND process_id = '.$this->getDB()->fullQuoteStr($process->getProcess_id(), $this->tableName)); |
|
| 97 | 97 | } |
| 98 | 98 | |
| 99 | 99 | /** |
@@ -135,8 +135,8 @@ discard block |
||
| 135 | 135 | */ |
| 136 | 136 | protected function countItemsByWhereClause($where) { |
| 137 | 137 | $db = $this->getDB(); |
| 138 | - $rs = $db->exec_SELECTquery('count(*) as anz',$this->tableName,$where); |
|
| 139 | - $res = $db->sql_fetch_assoc($rs); |
|
| 138 | + $rs = $db->exec_SELECTquery('count(*) as anz', $this->tableName, $where); |
|
| 139 | + $res = $db->sql_fetch_assoc($rs); |
|
| 140 | 140 | |
| 141 | 141 | return $res['anz']; |
| 142 | 142 | } |
@@ -202,7 +202,7 @@ discard block |
||
| 202 | 202 | $res = $db->exec_SELECTquery( |
| 203 | 203 | 'configuration, count(*) as c', |
| 204 | 204 | $this->tableName, |
| 205 | - 'set_id in ('. implode(',',$setIds).') AND scheduled < '.time(), |
|
| 205 | + 'set_id in ('.implode(',', $setIds).') AND scheduled < '.time(), |
|
| 206 | 206 | 'configuration' |
| 207 | 207 | ); |
| 208 | 208 | while ($row = $db->sql_fetch_assoc($res)) { |
@@ -247,7 +247,7 @@ discard block |
||
| 247 | 247 | * @author Fabrizio Branca <[email protected]> |
| 248 | 248 | * @since 2010-11-16 |
| 249 | 249 | */ |
| 250 | - public function getLastProcessedEntries($selectFields='*', $limit='100') { |
|
| 250 | + public function getLastProcessedEntries($selectFields = '*', $limit = '100') { |
|
| 251 | 251 | $db = $this->getDB(); |
| 252 | 252 | $res = $db->exec_SELECTquery( |
| 253 | 253 | $selectFields, |
@@ -279,7 +279,7 @@ discard block |
||
| 279 | 279 | $res = $db->exec_SELECTquery( |
| 280 | 280 | 'process_id_completed, min(exec_time) as start, max(exec_time) as end, count(*) as urlcount', |
| 281 | 281 | $this->tableName, |
| 282 | - 'exec_time != 0 and exec_time >= '.intval($start). ' and exec_time <= ' . intval($end), |
|
| 282 | + 'exec_time != 0 and exec_time >= '.intval($start).' and exec_time <= '.intval($end), |
|
| 283 | 283 | 'process_id_completed' |
| 284 | 284 | ); |
| 285 | 285 | |
@@ -36,7 +36,7 @@ |
||
| 36 | 36 | * @author Timo Schmidt <[email protected]> |
| 37 | 37 | * @return int |
| 38 | 38 | */ |
| 39 | - public function getExecutionTime(){ |
|
| 39 | + public function getExecutionTime() { |
|
| 40 | 40 | return $this->row['exec_time']; |
| 41 | 41 | } |
| 42 | 42 | |
@@ -59,12 +59,12 @@ discard block |
||
| 59 | 59 | $this->tableName, |
| 60 | 60 | $where, |
| 61 | 61 | '', |
| 62 | - htmlspecialchars($orderField) . ' ' . htmlspecialchars($orderDirection), |
|
| 62 | + htmlspecialchars($orderField).' '.htmlspecialchars($orderDirection), |
|
| 63 | 63 | self::getLimitFromItemCountAndOffset($itemCount, $offset) |
| 64 | 64 | ); |
| 65 | 65 | |
| 66 | 66 | if (is_array($rows)) { |
| 67 | - foreach($rows as $row) { |
|
| 67 | + foreach ($rows as $row) { |
|
| 68 | 68 | $collection->append(\TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance($this->objectClassname, $row)); |
| 69 | 69 | } |
| 70 | 70 | } |
@@ -99,7 +99,7 @@ discard block |
||
| 99 | 99 | * @return integer |
| 100 | 100 | */ |
| 101 | 101 | public function countNotTimeouted($ttl) { |
| 102 | - return $this->countByWhere('deleted = 0 AND ttl > ' . intval($ttl)); |
|
| 102 | + return $this->countByWhere('deleted = 0 AND ttl > '.intval($ttl)); |
|
| 103 | 103 | } |
| 104 | 104 | |
| 105 | 105 | /** |
@@ -113,7 +113,7 @@ discard block |
||
| 113 | 113 | public static function getLimitFromItemCountAndOffset($itemCount, $offset) { |
| 114 | 114 | $itemCount = filter_var($itemCount, FILTER_VALIDATE_INT, array('options' => array('min_range' => 1, 'default' => 20))); |
| 115 | 115 | $offset = filter_var($offset, FILTER_VALIDATE_INT, array('options' => array('min_range' => 0, 'default' => 0))); |
| 116 | - $limit = $offset . ', ' . $itemCount; |
|
| 116 | + $limit = $offset.', '.$itemCount; |
|
| 117 | 117 | |
| 118 | 118 | return $limit; |
| 119 | 119 | } |
@@ -151,7 +151,7 @@ |
||
| 151 | 151 | // TODO: use class constants for these states |
| 152 | 152 | if ($this->getActive() && $this->getProgress() < 100) { |
| 153 | 153 | $stage = 'running'; |
| 154 | - } elseif(!$this->getActive() && $this->getProgress() < 100) { |
|
| 154 | + } elseif (!$this->getActive() && $this->getProgress() < 100) { |
|
| 155 | 155 | $stage = 'cancelled'; |
| 156 | 156 | } else { |
| 157 | 157 | $stage = 'completed'; |
@@ -47,7 +47,7 @@ |
||
| 47 | 47 | public function execute() |
| 48 | 48 | { |
| 49 | 49 | $processManager = new \tx_crawler_domain_process_manager(); |
| 50 | - $timeout = is_int($this->timeOut) ? (int)$this->timeOut : 1800; |
|
| 50 | + $timeout = is_int($this->timeOut) ? (int) $this->timeOut : 1800; |
|
| 51 | 51 | |
| 52 | 52 | try { |
| 53 | 53 | $processManager->multiProcess($timeout); |
@@ -4,10 +4,10 @@ |
||
| 4 | 4 | } |
| 5 | 5 | |
| 6 | 6 | $processManager = new tx_crawler_domain_process_manager(); |
| 7 | -$timeout = isset($_SERVER['argv'][1] ) ? intval($_SERVER['argv'][1]) : 1800; |
|
| 7 | +$timeout = isset($_SERVER['argv'][1]) ? intval($_SERVER['argv'][1]) : 1800; |
|
| 8 | 8 | |
| 9 | 9 | try { |
| 10 | 10 | $processManager->multiProcess($timeout); |
| 11 | 11 | } catch (Exception $e) { |
| 12 | - echo PHP_EOL . $e->getMessage(); |
|
| 12 | + echo PHP_EOL.$e->getMessage(); |
|
| 13 | 13 | } |
@@ -26,7 +26,7 @@ discard block |
||
| 26 | 26 | * Manages cralwer processes and can be used to start a new process or multiple processes |
| 27 | 27 | * |
| 28 | 28 | */ |
| 29 | -class tx_crawler_domain_process_manager { |
|
| 29 | +class tx_crawler_domain_process_manager { |
|
| 30 | 30 | /** |
| 31 | 31 | * @var $timeToLive integer |
| 32 | 32 | */ |
@@ -65,8 +65,8 @@ discard block |
||
| 65 | 65 | * the constructor |
| 66 | 66 | */ |
| 67 | 67 | public function __construct() { |
| 68 | - $this->processRepository = new tx_crawler_domain_process_repository(); |
|
| 69 | - $this->queueRepository = new tx_crawler_domain_queue_repository(); |
|
| 68 | + $this->processRepository = new tx_crawler_domain_process_repository(); |
|
| 69 | + $this->queueRepository = new tx_crawler_domain_queue_repository(); |
|
| 70 | 70 | $this->crawlerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_lib'); |
| 71 | 71 | $this->timeToLive = intval($this->crawlerObj->extensionSettings['processMaxRunTime']); |
| 72 | 72 | $this->countInARun = intval($this->crawlerObj->extensionSettings['countInARun']); |
@@ -81,7 +81,7 @@ discard block |
||
| 81 | 81 | */ |
| 82 | 82 | public function multiProcess($timeout) { |
| 83 | 83 | if ($this->processLimit <= 1) { |
| 84 | - throw new RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL); |
|
| 84 | + throw new RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.'.PHP_EOL); |
|
| 85 | 85 | } |
| 86 | 86 | |
| 87 | 87 | $pendingItemsStart = $this->queueRepository->countAllPendingItems(); |
@@ -99,7 +99,7 @@ discard block |
||
| 99 | 99 | } |
| 100 | 100 | if ($currentPendingItems == 0) { |
| 101 | 101 | if ($this->verbose) { |
| 102 | - echo 'Finished...' . chr(10); |
|
| 102 | + echo 'Finished...'.chr(10); |
|
| 103 | 103 | } |
| 104 | 104 | break; |
| 105 | 105 | } |
@@ -111,16 +111,16 @@ discard block |
||
| 111 | 111 | } |
| 112 | 112 | sleep(1); |
| 113 | 113 | if ($nextTimeOut < time()) { |
| 114 | - $timedOutProcesses = $this->processRepository->findAll('', 'DESC', NULL, 0, 'ttl >' . $nextTimeOut); |
|
| 114 | + $timedOutProcesses = $this->processRepository->findAll('', 'DESC', NULL, 0, 'ttl >'.$nextTimeOut); |
|
| 115 | 115 | $nextTimeOut = time() + $this->timeToLive; |
| 116 | 116 | if ($this->verbose) { |
| 117 | - echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10); |
|
| 117 | + echo 'Cleanup'.implode(',', $timedOutProcesses->getProcessIds()).chr(10); |
|
| 118 | 118 | } |
| 119 | 119 | $this->crawlerObj->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true); |
| 120 | 120 | } |
| 121 | 121 | } |
| 122 | 122 | if ($currentPendingItems > 0 && $this->verbose) { |
| 123 | - echo 'Stop with timeout' . chr(10); |
|
| 123 | + echo 'Stop with timeout'.chr(10); |
|
| 124 | 124 | } |
| 125 | 125 | } |
| 126 | 126 | |
@@ -138,17 +138,17 @@ discard block |
||
| 138 | 138 | */ |
| 139 | 139 | private function startRequiredProcesses() { |
| 140 | 140 | $ret = FALSE; |
| 141 | - $currentProcesses= $this->processRepository->countActive(); |
|
| 142 | - $availableProcessesCount = $this->processLimit-$currentProcesses; |
|
| 141 | + $currentProcesses = $this->processRepository->countActive(); |
|
| 142 | + $availableProcessesCount = $this->processLimit - $currentProcesses; |
|
| 143 | 143 | $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun); |
| 144 | - $startProcessCount = min(array($availableProcessesCount,$requiredProcessesCount)); |
|
| 144 | + $startProcessCount = min(array($availableProcessesCount, $requiredProcessesCount)); |
|
| 145 | 145 | if ($startProcessCount <= 0) { |
| 146 | 146 | return $ret; |
| 147 | 147 | } |
| 148 | 148 | if ($startProcessCount && $this->verbose) { |
| 149 | 149 | echo 'Start '.$startProcessCount.' new processes (Running:'.$currentProcesses.')'; |
| 150 | 150 | } |
| 151 | - for($i=0;$i<$startProcessCount;$i++) { |
|
| 151 | + for ($i = 0; $i < $startProcessCount; $i++) { |
|
| 152 | 152 | usleep(100); |
| 153 | 153 | if ($this->startProcess()) { |
| 154 | 154 | if ($this->verbose) { |
@@ -168,14 +168,14 @@ discard block |
||
| 168 | 168 | * @throws Exception if no crawlerprocess was started |
| 169 | 169 | */ |
| 170 | 170 | public function startProcess() { |
| 171 | - $ttl = (time() + $this->timeToLive -1); |
|
| 171 | + $ttl = (time() + $this->timeToLive - 1); |
|
| 172 | 172 | $current = $this->processRepository->countNotTimeouted($ttl); |
| 173 | - $completePath = '(' .escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null'; |
|
| 173 | + $completePath = '('.escapeshellcmd($this->getCrawlerCliPath()).' &) > /dev/null'; |
|
| 174 | 174 | if (system($completePath) === FALSE) { |
| 175 | 175 | throw new Exception('could not start process!'); |
| 176 | 176 | } |
| 177 | 177 | else { |
| 178 | - for ($i=0;$i<10;$i++) { |
|
| 178 | + for ($i = 0; $i < 10; $i++) { |
|
| 179 | 179 | if ($this->processRepository->countNotTimeouted($ttl) > $current) { |
| 180 | 180 | return true; |
| 181 | 181 | } |
@@ -190,10 +190,10 @@ discard block |
||
| 190 | 190 | * |
| 191 | 191 | * @return string |
| 192 | 192 | */ |
| 193 | - public function getCrawlerCliPath(){ |
|
| 194 | - $phpPath = $this->crawlerObj->extensionSettings['phpPath'] . ' '; |
|
| 195 | - $pathToTypo3 = rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_DOCUMENT_ROOT'), '/'); |
|
| 196 | - $pathToTypo3 .= rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_PATH'), '/'); |
|
| 193 | + public function getCrawlerCliPath() { |
|
| 194 | + $phpPath = $this->crawlerObj->extensionSettings['phpPath'].' '; |
|
| 195 | + $pathToTypo3 = rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_DOCUMENT_ROOT'), '/'); |
|
| 196 | + $pathToTypo3 .= rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_PATH'), '/'); |
|
| 197 | 197 | $cliPart = '/typo3/cli_dispatch.phpsh crawler'; |
| 198 | 198 | return $phpPath.$pathToTypo3.$cliPart; |
| 199 | 199 | } |