@@ -66,10 +66,10 @@ |
||
66 | 66 | if ($this->isCrawlerExtensionRunning($frontend) && preg_match('#^/index.php\?&?id=(\d+)(&.*)?$#', $uri, $matches)) { |
67 | 67 | $speakingUri = $frontend->cObj->typoLink_URL(array('parameter' => $matches[1], 'additionalParams' => $matches[2])); |
68 | 68 | $speakingUriParts = parse_url($speakingUri); |
69 | - if(false === $speakingUriParts){ |
|
70 | - throw new \Exception('Could not parse URI: ' . $speakingUri, 1289915976); |
|
69 | + if (false === $speakingUriParts) { |
|
70 | + throw new \Exception('Could not parse URI: '.$speakingUri, 1289915976); |
|
71 | 71 | } |
72 | - $speakingUrlPath = '/' . ltrim($speakingUriParts['path'], '/'); |
|
72 | + $speakingUrlPath = '/'.ltrim($speakingUriParts['path'], '/'); |
|
73 | 73 | // Don't change anything if speaking URL is part of old URI: |
74 | 74 | // (it might be the case the using the speaking URL failed) |
75 | 75 | if (strpos($uri, $speakingUrlPath) !== 0 || $speakingUrlPath === '/') { |
@@ -92,7 +92,7 @@ discard block |
||
92 | 92 | $this->startPage = 0; |
93 | 93 | } |
94 | 94 | |
95 | - $_SERVER['argv'] = array($_SERVER['argv'][0], $this->startPage,'-ss', '-d', $this->depth, '-o', self::MODE, '-conf', implode(',', $this->configuration)); |
|
95 | + $_SERVER['argv'] = array($_SERVER['argv'][0], $this->startPage, '-ss', '-d', $this->depth, '-o', self::MODE, '-conf', implode(',', $this->configuration)); |
|
96 | 96 | } |
97 | 97 | |
98 | 98 | |
@@ -109,6 +109,6 @@ discard block |
||
109 | 109 | $this->startPage = 0; |
110 | 110 | } |
111 | 111 | |
112 | - return implode(',', $this->configuration) . ' (depth: ' . $this->depth . ', startPage:' . $this->startPage . ')'; |
|
112 | + return implode(',', $this->configuration).' (depth: '.$this->depth.', startPage:'.$this->startPage.')'; |
|
113 | 113 | } |
114 | 114 | } |
115 | 115 | \ No newline at end of file |
@@ -37,7 +37,7 @@ discard block |
||
37 | 37 | * @return tx_crawler_domain_queue_entry $entry |
38 | 38 | */ |
39 | 39 | public function findYoungestEntryForProcess(tx_crawler_domain_process $process) { |
40 | - return $this->getFirstOrLastObjectByProcess($process,'exec_time ASC'); |
|
40 | + return $this->getFirstOrLastObjectByProcess($process, 'exec_time ASC'); |
|
41 | 41 | } |
42 | 42 | |
43 | 43 | /** |
@@ -47,7 +47,7 @@ discard block |
||
47 | 47 | * @return tx_crawler_domain_queue_entry |
48 | 48 | */ |
49 | 49 | public function findOldestEntryForProcess(tx_crawler_domain_process $process) { |
50 | - return $this->getFirstOrLastObjectByProcess($process,'exec_time DESC'); |
|
50 | + return $this->getFirstOrLastObjectByProcess($process, 'exec_time DESC'); |
|
51 | 51 | } |
52 | 52 | |
53 | 53 | |
@@ -60,15 +60,15 @@ discard block |
||
60 | 60 | */ |
61 | 61 | protected function getFirstOrLastObjectByProcess($process, $orderby) { |
62 | 62 | $db = $this->getDB(); |
63 | - $where = 'process_id_completed='.$db->fullQuoteStr($process->getProcess_id(),$this->tableName). |
|
63 | + $where = 'process_id_completed='.$db->fullQuoteStr($process->getProcess_id(), $this->tableName). |
|
64 | 64 | ' AND exec_time > 0 '; |
65 | 65 | $limit = 1; |
66 | 66 | $groupby = ''; |
67 | 67 | |
68 | - $res = $db->exec_SELECTgetRows('*','tx_crawler_queue',$where,$groupby,$orderby,$limit); |
|
69 | - if($res) { |
|
70 | - $first = $res[0]; |
|
71 | - }else{ |
|
68 | + $res = $db->exec_SELECTgetRows('*', 'tx_crawler_queue', $where, $groupby, $orderby, $limit); |
|
69 | + if ($res) { |
|
70 | + $first = $res[0]; |
|
71 | + } else { |
|
72 | 72 | $first = array(); |
73 | 73 | } |
74 | 74 | $resultObject = new tx_crawler_domain_queue_entry($first); |
@@ -83,7 +83,7 @@ discard block |
||
83 | 83 | */ |
84 | 84 | public function countExtecutedItemsByProcess($process) { |
85 | 85 | |
86 | - return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = '.$this->getDB()->fullQuoteStr($process->getProcess_id(),$this->tableName)); |
|
86 | + return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = '.$this->getDB()->fullQuoteStr($process->getProcess_id(), $this->tableName)); |
|
87 | 87 | } |
88 | 88 | |
89 | 89 | /** |
@@ -93,7 +93,7 @@ discard block |
||
93 | 93 | * @return int |
94 | 94 | */ |
95 | 95 | public function countNonExecutedItemsByProcess($process) { |
96 | - return $this->countItemsByWhereClause('exec_time = 0 AND process_id = '.$this->getDB()->fullQuoteStr($process->getProcess_id(),$this->tableName)); |
|
96 | + return $this->countItemsByWhereClause('exec_time = 0 AND process_id = '.$this->getDB()->fullQuoteStr($process->getProcess_id(), $this->tableName)); |
|
97 | 97 | } |
98 | 98 | |
99 | 99 | /** |
@@ -135,8 +135,8 @@ discard block |
||
135 | 135 | */ |
136 | 136 | protected function countItemsByWhereClause($where) { |
137 | 137 | $db = $this->getDB(); |
138 | - $rs = $db->exec_SELECTquery('count(*) as anz',$this->tableName,$where); |
|
139 | - $res = $db->sql_fetch_assoc($rs); |
|
138 | + $rs = $db->exec_SELECTquery('count(*) as anz', $this->tableName, $where); |
|
139 | + $res = $db->sql_fetch_assoc($rs); |
|
140 | 140 | |
141 | 141 | return $res['anz']; |
142 | 142 | } |
@@ -202,7 +202,7 @@ discard block |
||
202 | 202 | $res = $db->exec_SELECTquery( |
203 | 203 | 'configuration, count(*) as c', |
204 | 204 | $this->tableName, |
205 | - 'set_id in ('. implode(',',$setIds).') AND scheduled < '.time(), |
|
205 | + 'set_id in ('.implode(',', $setIds).') AND scheduled < '.time(), |
|
206 | 206 | 'configuration' |
207 | 207 | ); |
208 | 208 | while ($row = $db->sql_fetch_assoc($res)) { |
@@ -247,7 +247,7 @@ discard block |
||
247 | 247 | * @author Fabrizio Branca <[email protected]> |
248 | 248 | * @since 2010-11-16 |
249 | 249 | */ |
250 | - public function getLastProcessedEntries($selectFields='*', $limit='100') { |
|
250 | + public function getLastProcessedEntries($selectFields = '*', $limit = '100') { |
|
251 | 251 | $db = $this->getDB(); |
252 | 252 | $res = $db->exec_SELECTquery( |
253 | 253 | $selectFields, |
@@ -279,7 +279,7 @@ discard block |
||
279 | 279 | $res = $db->exec_SELECTquery( |
280 | 280 | 'process_id_completed, min(exec_time) as start, max(exec_time) as end, count(*) as urlcount', |
281 | 281 | $this->tableName, |
282 | - 'exec_time != 0 and exec_time >= '.intval($start). ' and exec_time <= ' . intval($end), |
|
282 | + 'exec_time != 0 and exec_time >= '.intval($start).' and exec_time <= '.intval($end), |
|
283 | 283 | 'process_id_completed' |
284 | 284 | ); |
285 | 285 |
@@ -36,7 +36,7 @@ |
||
36 | 36 | * @author Timo Schmidt <[email protected]> |
37 | 37 | * @return int |
38 | 38 | */ |
39 | - public function getExecutionTime(){ |
|
39 | + public function getExecutionTime() { |
|
40 | 40 | return $this->row['exec_time']; |
41 | 41 | } |
42 | 42 |
@@ -59,12 +59,12 @@ discard block |
||
59 | 59 | $this->tableName, |
60 | 60 | $where, |
61 | 61 | '', |
62 | - htmlspecialchars($orderField) . ' ' . htmlspecialchars($orderDirection), |
|
62 | + htmlspecialchars($orderField).' '.htmlspecialchars($orderDirection), |
|
63 | 63 | self::getLimitFromItemCountAndOffset($itemCount, $offset) |
64 | 64 | ); |
65 | 65 | |
66 | 66 | if (is_array($rows)) { |
67 | - foreach($rows as $row) { |
|
67 | + foreach ($rows as $row) { |
|
68 | 68 | $collection->append(\TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance($this->objectClassname, $row)); |
69 | 69 | } |
70 | 70 | } |
@@ -99,7 +99,7 @@ discard block |
||
99 | 99 | * @return integer |
100 | 100 | */ |
101 | 101 | public function countNotTimeouted($ttl) { |
102 | - return $this->countByWhere('deleted = 0 AND ttl > ' . intval($ttl)); |
|
102 | + return $this->countByWhere('deleted = 0 AND ttl > '.intval($ttl)); |
|
103 | 103 | } |
104 | 104 | |
105 | 105 | /** |
@@ -113,7 +113,7 @@ discard block |
||
113 | 113 | public static function getLimitFromItemCountAndOffset($itemCount, $offset) { |
114 | 114 | $itemCount = filter_var($itemCount, FILTER_VALIDATE_INT, array('options' => array('min_range' => 1, 'default' => 20))); |
115 | 115 | $offset = filter_var($offset, FILTER_VALIDATE_INT, array('options' => array('min_range' => 0, 'default' => 0))); |
116 | - $limit = $offset . ', ' . $itemCount; |
|
116 | + $limit = $offset.', '.$itemCount; |
|
117 | 117 | |
118 | 118 | return $limit; |
119 | 119 | } |
@@ -151,7 +151,7 @@ |
||
151 | 151 | // TODO: use class constants for these states |
152 | 152 | if ($this->getActive() && $this->getProgress() < 100) { |
153 | 153 | $stage = 'running'; |
154 | - } elseif(!$this->getActive() && $this->getProgress() < 100) { |
|
154 | + } elseif (!$this->getActive() && $this->getProgress() < 100) { |
|
155 | 155 | $stage = 'cancelled'; |
156 | 156 | } else { |
157 | 157 | $stage = 'completed'; |
@@ -47,7 +47,7 @@ |
||
47 | 47 | public function execute() |
48 | 48 | { |
49 | 49 | $processManager = new \tx_crawler_domain_process_manager(); |
50 | - $timeout = is_int($this->timeOut) ? (int)$this->timeOut : 1800; |
|
50 | + $timeout = is_int($this->timeOut) ? (int) $this->timeOut : 1800; |
|
51 | 51 | |
52 | 52 | try { |
53 | 53 | $processManager->multiProcess($timeout); |
@@ -4,10 +4,10 @@ |
||
4 | 4 | } |
5 | 5 | |
6 | 6 | $processManager = new tx_crawler_domain_process_manager(); |
7 | -$timeout = isset($_SERVER['argv'][1] ) ? intval($_SERVER['argv'][1]) : 1800; |
|
7 | +$timeout = isset($_SERVER['argv'][1]) ? intval($_SERVER['argv'][1]) : 1800; |
|
8 | 8 | |
9 | 9 | try { |
10 | 10 | $processManager->multiProcess($timeout); |
11 | 11 | } catch (Exception $e) { |
12 | - echo PHP_EOL . $e->getMessage(); |
|
12 | + echo PHP_EOL.$e->getMessage(); |
|
13 | 13 | } |
@@ -26,7 +26,7 @@ discard block |
||
26 | 26 | * Manages cralwer processes and can be used to start a new process or multiple processes |
27 | 27 | * |
28 | 28 | */ |
29 | -class tx_crawler_domain_process_manager { |
|
29 | +class tx_crawler_domain_process_manager { |
|
30 | 30 | /** |
31 | 31 | * @var $timeToLive integer |
32 | 32 | */ |
@@ -65,8 +65,8 @@ discard block |
||
65 | 65 | * the constructor |
66 | 66 | */ |
67 | 67 | public function __construct() { |
68 | - $this->processRepository = new tx_crawler_domain_process_repository(); |
|
69 | - $this->queueRepository = new tx_crawler_domain_queue_repository(); |
|
68 | + $this->processRepository = new tx_crawler_domain_process_repository(); |
|
69 | + $this->queueRepository = new tx_crawler_domain_queue_repository(); |
|
70 | 70 | $this->crawlerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_lib'); |
71 | 71 | $this->timeToLive = intval($this->crawlerObj->extensionSettings['processMaxRunTime']); |
72 | 72 | $this->countInARun = intval($this->crawlerObj->extensionSettings['countInARun']); |
@@ -81,7 +81,7 @@ discard block |
||
81 | 81 | */ |
82 | 82 | public function multiProcess($timeout) { |
83 | 83 | if ($this->processLimit <= 1) { |
84 | - throw new RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.' . PHP_EOL); |
|
84 | + throw new RuntimeException('To run crawler in multi process mode you have to configure the processLimit > 1.'.PHP_EOL); |
|
85 | 85 | } |
86 | 86 | |
87 | 87 | $pendingItemsStart = $this->queueRepository->countAllPendingItems(); |
@@ -99,7 +99,7 @@ discard block |
||
99 | 99 | } |
100 | 100 | if ($currentPendingItems == 0) { |
101 | 101 | if ($this->verbose) { |
102 | - echo 'Finished...' . chr(10); |
|
102 | + echo 'Finished...'.chr(10); |
|
103 | 103 | } |
104 | 104 | break; |
105 | 105 | } |
@@ -111,16 +111,16 @@ discard block |
||
111 | 111 | } |
112 | 112 | sleep(1); |
113 | 113 | if ($nextTimeOut < time()) { |
114 | - $timedOutProcesses = $this->processRepository->findAll('', 'DESC', NULL, 0, 'ttl >' . $nextTimeOut); |
|
114 | + $timedOutProcesses = $this->processRepository->findAll('', 'DESC', NULL, 0, 'ttl >'.$nextTimeOut); |
|
115 | 115 | $nextTimeOut = time() + $this->timeToLive; |
116 | 116 | if ($this->verbose) { |
117 | - echo 'Cleanup' . implode(',', $timedOutProcesses->getProcessIds()) . chr(10); |
|
117 | + echo 'Cleanup'.implode(',', $timedOutProcesses->getProcessIds()).chr(10); |
|
118 | 118 | } |
119 | 119 | $this->crawlerObj->CLI_releaseProcesses($timedOutProcesses->getProcessIds(), true); |
120 | 120 | } |
121 | 121 | } |
122 | 122 | if ($currentPendingItems > 0 && $this->verbose) { |
123 | - echo 'Stop with timeout' . chr(10); |
|
123 | + echo 'Stop with timeout'.chr(10); |
|
124 | 124 | } |
125 | 125 | } |
126 | 126 | |
@@ -138,17 +138,17 @@ discard block |
||
138 | 138 | */ |
139 | 139 | private function startRequiredProcesses() { |
140 | 140 | $ret = FALSE; |
141 | - $currentProcesses= $this->processRepository->countActive(); |
|
142 | - $availableProcessesCount = $this->processLimit-$currentProcesses; |
|
141 | + $currentProcesses = $this->processRepository->countActive(); |
|
142 | + $availableProcessesCount = $this->processLimit - $currentProcesses; |
|
143 | 143 | $requiredProcessesCount = ceil($this->queueRepository->countAllUnassignedPendingItems() / $this->countInARun); |
144 | - $startProcessCount = min(array($availableProcessesCount,$requiredProcessesCount)); |
|
144 | + $startProcessCount = min(array($availableProcessesCount, $requiredProcessesCount)); |
|
145 | 145 | if ($startProcessCount <= 0) { |
146 | 146 | return $ret; |
147 | 147 | } |
148 | 148 | if ($startProcessCount && $this->verbose) { |
149 | 149 | echo 'Start '.$startProcessCount.' new processes (Running:'.$currentProcesses.')'; |
150 | 150 | } |
151 | - for($i=0;$i<$startProcessCount;$i++) { |
|
151 | + for ($i = 0; $i < $startProcessCount; $i++) { |
|
152 | 152 | usleep(100); |
153 | 153 | if ($this->startProcess()) { |
154 | 154 | if ($this->verbose) { |
@@ -168,14 +168,14 @@ discard block |
||
168 | 168 | * @throws Exception if no crawlerprocess was started |
169 | 169 | */ |
170 | 170 | public function startProcess() { |
171 | - $ttl = (time() + $this->timeToLive -1); |
|
171 | + $ttl = (time() + $this->timeToLive - 1); |
|
172 | 172 | $current = $this->processRepository->countNotTimeouted($ttl); |
173 | - $completePath = '(' .escapeshellcmd($this->getCrawlerCliPath()) . ' &) > /dev/null'; |
|
173 | + $completePath = '('.escapeshellcmd($this->getCrawlerCliPath()).' &) > /dev/null'; |
|
174 | 174 | if (system($completePath) === FALSE) { |
175 | 175 | throw new Exception('could not start process!'); |
176 | 176 | } |
177 | 177 | else { |
178 | - for ($i=0;$i<10;$i++) { |
|
178 | + for ($i = 0; $i < 10; $i++) { |
|
179 | 179 | if ($this->processRepository->countNotTimeouted($ttl) > $current) { |
180 | 180 | return true; |
181 | 181 | } |
@@ -190,10 +190,10 @@ discard block |
||
190 | 190 | * |
191 | 191 | * @return string |
192 | 192 | */ |
193 | - public function getCrawlerCliPath(){ |
|
194 | - $phpPath = $this->crawlerObj->extensionSettings['phpPath'] . ' '; |
|
195 | - $pathToTypo3 = rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_DOCUMENT_ROOT'), '/'); |
|
196 | - $pathToTypo3 .= rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_PATH'), '/'); |
|
193 | + public function getCrawlerCliPath() { |
|
194 | + $phpPath = $this->crawlerObj->extensionSettings['phpPath'].' '; |
|
195 | + $pathToTypo3 = rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_DOCUMENT_ROOT'), '/'); |
|
196 | + $pathToTypo3 .= rtrim(\TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_PATH'), '/'); |
|
197 | 197 | $cliPart = '/typo3/cli_dispatch.phpsh crawler'; |
198 | 198 | return $phpPath.$pathToTypo3.$cliPart; |
199 | 199 | } |