@@ -1,11 +1,11 @@ |
||
| 1 | 1 | <?php if (!defined('TYPO3_MODE')) die ('Access denied.'); ?> |
| 2 | 2 | |
| 3 | 3 | Page: |
| 4 | -<?php for($currentPageOffset = 0; $currentPageOffset < $this->getTotalPagesCount(); $currentPageOffset++ ){ ?> |
|
| 4 | +<?php for ($currentPageOffset = 0; $currentPageOffset < $this->getTotalPagesCount(); $currentPageOffset++) { ?> |
|
| 5 | 5 | <a href="index.php?offset=<?php echo htmlspecialchars($currentPageOffset * $this->getPerPage()); ?>"> |
| 6 | 6 | <?php echo htmlspecialchars($this->getLabelForPageOffset($currentPageOffset)); ?> |
| 7 | 7 | </a> |
| 8 | - <?php if($currentPageOffset+1 < $this->getTotalPagesCount()){ ?> |
|
| 8 | + <?php if ($currentPageOffset + 1 < $this->getTotalPagesCount()) { ?> |
|
| 9 | 9 | | |
| 10 | 10 | <?php } ?> |
| 11 | 11 | |
@@ -5,11 +5,11 @@ |
||
| 5 | 5 | <?php echo $this->getRefreshLink(); ?> |
| 6 | 6 | <?php echo $this->getEnableDisableLink(); ?> |
| 7 | 7 | <?php |
| 8 | - // Check if ActiveProcess is reached |
|
| 9 | - if (\TYPO3\CMS\Core\Utility\MathUtility::convertToPositiveInteger($this->getActiveProcessCount()) < \TYPO3\CMS\Core\Utility\MathUtility::convertToPositiveInteger($this->getMaxActiveProcessCount())) { |
|
| 10 | - echo $this->getAddLink(); |
|
| 11 | - } |
|
| 12 | - ?> |
|
| 8 | + // Check if ActiveProcess is reached |
|
| 9 | + if (\TYPO3\CMS\Core\Utility\MathUtility::convertToPositiveInteger($this->getActiveProcessCount()) < \TYPO3\CMS\Core\Utility\MathUtility::convertToPositiveInteger($this->getMaxActiveProcessCount())) { |
|
| 10 | + echo $this->getAddLink(); |
|
| 11 | + } |
|
| 12 | + ?> |
|
| 13 | 13 | <?php echo $this->getModeLink(); ?> |
| 14 | 14 | </div> |
| 15 | 15 | |
@@ -50,17 +50,17 @@ |
||
| 50 | 50 | </tr> |
| 51 | 51 | </thead> |
| 52 | 52 | <tbody> |
| 53 | - <?php foreach($this->getProcessCollection() as $process): /* @var $process tx_crawler_domain_process */ ?> |
|
| 54 | - <tr class="<?php echo (++$count % 2 == 0) ? 'odd': 'even' ?>"> |
|
| 53 | + <?php foreach ($this->getProcessCollection() as $process): /* @var $process tx_crawler_domain_process */ ?> |
|
| 54 | + <tr class="<?php echo (++$count % 2 == 0) ? 'odd' : 'even' ?>"> |
|
| 55 | 55 | <td><?php echo $this->getIconForState(htmlspecialchars($process->getState())); ?></td> |
| 56 | 56 | <td><?php echo htmlspecialchars($process->getProcess_id()); ?></td> |
| 57 | 57 | <td><?php echo htmlspecialchars($this->asDate($process->getTimeForFirstItem())); ?></td> |
| 58 | 58 | <td><?php echo htmlspecialchars($this->asDate($process->getTimeForLastItem())); ?></td> |
| 59 | - <td><?php echo htmlspecialchars(floor($process->getRuntime()/ 60)); ?> min. <?php echo htmlspecialchars($process->getRuntime()) % 60 ?> sec.</td> |
|
| 59 | + <td><?php echo htmlspecialchars(floor($process->getRuntime() / 60)); ?> min. <?php echo htmlspecialchars($process->getRuntime()) % 60 ?> sec.</td> |
|
| 60 | 60 | <td><?php echo htmlspecialchars($this->asDate($process->getTTL())); ?></td> |
| 61 | 61 | <td><?php echo htmlspecialchars($process->countItemsProcessed()); ?></td> |
| 62 | 62 | <td><?php echo htmlspecialchars($process->countItemsAssigned()); ?></td> |
| 63 | - <td><?php echo htmlspecialchars($process->countItemsToProcess()+$process->countItemsProcessed()); ?></td> |
|
| 63 | + <td><?php echo htmlspecialchars($process->countItemsToProcess() + $process->countItemsProcessed()); ?></td> |
|
| 64 | 64 | <td> |
| 65 | 65 | <?php if ($process->getState() == 'running'): ?> |
| 66 | 66 | <div class="crawlerprocessprogress" style="width: 200px;"> |
@@ -1,24 +1,24 @@ |
||
| 1 | 1 | <?php |
| 2 | 2 | $extensionPath = \TYPO3\CMS\Core\Utility\ExtensionManagementUtility::extPath('crawler'); |
| 3 | 3 | return array( |
| 4 | - 'tx_crawler_lib' => $extensionPath . 'class.tx_crawler_lib.php', |
|
| 5 | - 'tx_crawler_cli_flush' => $extensionPath . 'cli/class.tx_crawler_cli_flush.php', |
|
| 6 | - 'tx_crawler_cli' => $extensionPath . 'cli/class.tx_crawler_cli.php', |
|
| 7 | - 'tx_crawler_cli_im' => $extensionPath . 'cli/class.tx_crawler_cli_im.php', |
|
| 8 | - 'tx_crawler_domain_events_dispatcher' => $extensionPath . 'domain/events/class.tx_crawler_domain_events_dispatcher.php', |
|
| 9 | - 'tx_crawler_domain_events_observer' => $extensionPath . 'domain/events/interface.tx_crawler_domain_events_observer.php', |
|
| 10 | - 'tx_crawler_domain_lib_abstract_dbobject' => $extensionPath . 'domain/lib/class.tx_crawler_domain_lib_abstract_dbobject.php', |
|
| 11 | - 'tx_crawler_domain_process_manager' => $extensionPath . 'domain/process/class.tx_crawler_domain_process_manager.php', |
|
| 12 | - 'tx_crawler_domain_process' => $extensionPath . 'domain/process/class.tx_crawler_domain_process.php', |
|
| 13 | - 'tx_crawler_domain_process_collection' => $extensionPath . 'domain/process/class.tx_crawler_domain_process_collection.php', |
|
| 14 | - 'tx_crawler_domain_process_repository' => $extensionPath . 'domain/process/class.tx_crawler_domain_process_repository.php', |
|
| 15 | - 'tx_crawler_domain_queue_entry' => $extensionPath . 'domain/queue/class.tx_crawler_domain_queue_entry.php', |
|
| 16 | - 'tx_crawler_domain_queue_repository' => $extensionPath . 'domain/queue/class.tx_crawler_domain_queue_repository.php', |
|
| 17 | - 'tx_crawler_domain_reason' => $extensionPath . 'domain/reason/class.tx_crawler_domain_reason.php', |
|
| 18 | - 'tx_crawler_hooks_tsfe' => $extensionPath . 'hooks/class.tx_crawler_hooks_tsfe.php', |
|
| 19 | - 'tx_crawler_hooks_staticFileCacheCreateUri' => $extensionPath . 'hooks/class.tx_crawler_hooks_staticFileCacheCreateUri.php', |
|
| 20 | - 'tx_crawler_hooks_processCleanUp' => $extensionPath . 'hooks/class.tx_crawler_hooks_processCleanUp.php', |
|
| 21 | - 'tx_crawler_modfunc1' => $extensionPath . 'modfunc1/class.tx_crawler_modfunc1.php', |
|
| 22 | - 'tx_crawler_view_pagination' => $extensionPath . 'view/class.tx_crawler_view_pagination.php', |
|
| 23 | - 'tx_crawler_view_process_list' => $extensionPath . 'view/process/class.tx_crawler_view_process_list.php', |
|
| 4 | + 'tx_crawler_lib' => $extensionPath . 'class.tx_crawler_lib.php', |
|
| 5 | + 'tx_crawler_cli_flush' => $extensionPath . 'cli/class.tx_crawler_cli_flush.php', |
|
| 6 | + 'tx_crawler_cli' => $extensionPath . 'cli/class.tx_crawler_cli.php', |
|
| 7 | + 'tx_crawler_cli_im' => $extensionPath . 'cli/class.tx_crawler_cli_im.php', |
|
| 8 | + 'tx_crawler_domain_events_dispatcher' => $extensionPath . 'domain/events/class.tx_crawler_domain_events_dispatcher.php', |
|
| 9 | + 'tx_crawler_domain_events_observer' => $extensionPath . 'domain/events/interface.tx_crawler_domain_events_observer.php', |
|
| 10 | + 'tx_crawler_domain_lib_abstract_dbobject' => $extensionPath . 'domain/lib/class.tx_crawler_domain_lib_abstract_dbobject.php', |
|
| 11 | + 'tx_crawler_domain_process_manager' => $extensionPath . 'domain/process/class.tx_crawler_domain_process_manager.php', |
|
| 12 | + 'tx_crawler_domain_process' => $extensionPath . 'domain/process/class.tx_crawler_domain_process.php', |
|
| 13 | + 'tx_crawler_domain_process_collection' => $extensionPath . 'domain/process/class.tx_crawler_domain_process_collection.php', |
|
| 14 | + 'tx_crawler_domain_process_repository' => $extensionPath . 'domain/process/class.tx_crawler_domain_process_repository.php', |
|
| 15 | + 'tx_crawler_domain_queue_entry' => $extensionPath . 'domain/queue/class.tx_crawler_domain_queue_entry.php', |
|
| 16 | + 'tx_crawler_domain_queue_repository' => $extensionPath . 'domain/queue/class.tx_crawler_domain_queue_repository.php', |
|
| 17 | + 'tx_crawler_domain_reason' => $extensionPath . 'domain/reason/class.tx_crawler_domain_reason.php', |
|
| 18 | + 'tx_crawler_hooks_tsfe' => $extensionPath . 'hooks/class.tx_crawler_hooks_tsfe.php', |
|
| 19 | + 'tx_crawler_hooks_staticFileCacheCreateUri' => $extensionPath . 'hooks/class.tx_crawler_hooks_staticFileCacheCreateUri.php', |
|
| 20 | + 'tx_crawler_hooks_processCleanUp' => $extensionPath . 'hooks/class.tx_crawler_hooks_processCleanUp.php', |
|
| 21 | + 'tx_crawler_modfunc1' => $extensionPath . 'modfunc1/class.tx_crawler_modfunc1.php', |
|
| 22 | + 'tx_crawler_view_pagination' => $extensionPath . 'view/class.tx_crawler_view_pagination.php', |
|
| 23 | + 'tx_crawler_view_process_list' => $extensionPath . 'view/process/class.tx_crawler_view_process_list.php', |
|
| 24 | 24 | ); |
@@ -1,24 +1,24 @@ |
||
| 1 | 1 | <?php |
| 2 | 2 | $extensionPath = \TYPO3\CMS\Core\Utility\ExtensionManagementUtility::extPath('crawler'); |
| 3 | 3 | return array( |
| 4 | - 'tx_crawler_lib' => $extensionPath . 'class.tx_crawler_lib.php', |
|
| 5 | - 'tx_crawler_cli_flush' => $extensionPath . 'cli/class.tx_crawler_cli_flush.php', |
|
| 6 | - 'tx_crawler_cli' => $extensionPath . 'cli/class.tx_crawler_cli.php', |
|
| 7 | - 'tx_crawler_cli_im' => $extensionPath . 'cli/class.tx_crawler_cli_im.php', |
|
| 8 | - 'tx_crawler_domain_events_dispatcher' => $extensionPath . 'domain/events/class.tx_crawler_domain_events_dispatcher.php', |
|
| 9 | - 'tx_crawler_domain_events_observer' => $extensionPath . 'domain/events/interface.tx_crawler_domain_events_observer.php', |
|
| 10 | - 'tx_crawler_domain_lib_abstract_dbobject' => $extensionPath . 'domain/lib/class.tx_crawler_domain_lib_abstract_dbobject.php', |
|
| 11 | - 'tx_crawler_domain_process_manager' => $extensionPath . 'domain/process/class.tx_crawler_domain_process_manager.php', |
|
| 12 | - 'tx_crawler_domain_process' => $extensionPath . 'domain/process/class.tx_crawler_domain_process.php', |
|
| 13 | - 'tx_crawler_domain_process_collection' => $extensionPath . 'domain/process/class.tx_crawler_domain_process_collection.php', |
|
| 14 | - 'tx_crawler_domain_process_repository' => $extensionPath . 'domain/process/class.tx_crawler_domain_process_repository.php', |
|
| 15 | - 'tx_crawler_domain_queue_entry' => $extensionPath . 'domain/queue/class.tx_crawler_domain_queue_entry.php', |
|
| 16 | - 'tx_crawler_domain_queue_repository' => $extensionPath . 'domain/queue/class.tx_crawler_domain_queue_repository.php', |
|
| 17 | - 'tx_crawler_domain_reason' => $extensionPath . 'domain/reason/class.tx_crawler_domain_reason.php', |
|
| 18 | - 'tx_crawler_hooks_tsfe' => $extensionPath . 'hooks/class.tx_crawler_hooks_tsfe.php', |
|
| 19 | - 'tx_crawler_hooks_staticFileCacheCreateUri' => $extensionPath . 'hooks/class.tx_crawler_hooks_staticFileCacheCreateUri.php', |
|
| 20 | - 'tx_crawler_hooks_processCleanUp' => $extensionPath . 'hooks/class.tx_crawler_hooks_processCleanUp.php', |
|
| 21 | - 'tx_crawler_modfunc1' => $extensionPath . 'modfunc1/class.tx_crawler_modfunc1.php', |
|
| 22 | - 'tx_crawler_view_pagination' => $extensionPath . 'view/class.tx_crawler_view_pagination.php', |
|
| 23 | - 'tx_crawler_view_process_list' => $extensionPath . 'view/process/class.tx_crawler_view_process_list.php', |
|
| 4 | + 'tx_crawler_lib' => $extensionPath.'class.tx_crawler_lib.php', |
|
| 5 | + 'tx_crawler_cli_flush' => $extensionPath.'cli/class.tx_crawler_cli_flush.php', |
|
| 6 | + 'tx_crawler_cli' => $extensionPath.'cli/class.tx_crawler_cli.php', |
|
| 7 | + 'tx_crawler_cli_im' => $extensionPath.'cli/class.tx_crawler_cli_im.php', |
|
| 8 | + 'tx_crawler_domain_events_dispatcher' => $extensionPath.'domain/events/class.tx_crawler_domain_events_dispatcher.php', |
|
| 9 | + 'tx_crawler_domain_events_observer' => $extensionPath.'domain/events/interface.tx_crawler_domain_events_observer.php', |
|
| 10 | + 'tx_crawler_domain_lib_abstract_dbobject' => $extensionPath.'domain/lib/class.tx_crawler_domain_lib_abstract_dbobject.php', |
|
| 11 | + 'tx_crawler_domain_process_manager' => $extensionPath.'domain/process/class.tx_crawler_domain_process_manager.php', |
|
| 12 | + 'tx_crawler_domain_process' => $extensionPath.'domain/process/class.tx_crawler_domain_process.php', |
|
| 13 | + 'tx_crawler_domain_process_collection' => $extensionPath.'domain/process/class.tx_crawler_domain_process_collection.php', |
|
| 14 | + 'tx_crawler_domain_process_repository' => $extensionPath.'domain/process/class.tx_crawler_domain_process_repository.php', |
|
| 15 | + 'tx_crawler_domain_queue_entry' => $extensionPath.'domain/queue/class.tx_crawler_domain_queue_entry.php', |
|
| 16 | + 'tx_crawler_domain_queue_repository' => $extensionPath.'domain/queue/class.tx_crawler_domain_queue_repository.php', |
|
| 17 | + 'tx_crawler_domain_reason' => $extensionPath.'domain/reason/class.tx_crawler_domain_reason.php', |
|
| 18 | + 'tx_crawler_hooks_tsfe' => $extensionPath.'hooks/class.tx_crawler_hooks_tsfe.php', |
|
| 19 | + 'tx_crawler_hooks_staticFileCacheCreateUri' => $extensionPath.'hooks/class.tx_crawler_hooks_staticFileCacheCreateUri.php', |
|
| 20 | + 'tx_crawler_hooks_processCleanUp' => $extensionPath.'hooks/class.tx_crawler_hooks_processCleanUp.php', |
|
| 21 | + 'tx_crawler_modfunc1' => $extensionPath.'modfunc1/class.tx_crawler_modfunc1.php', |
|
| 22 | + 'tx_crawler_view_pagination' => $extensionPath.'view/class.tx_crawler_view_pagination.php', |
|
| 23 | + 'tx_crawler_view_process_list' => $extensionPath.'view/process/class.tx_crawler_view_process_list.php', |
|
| 24 | 24 | ); |
@@ -1,13 +1,13 @@ |
||
| 1 | 1 | <?php |
| 2 | 2 | if (!defined('TYPO3_cliMode')) { |
| 3 | - die('You cannot run this script directly!'); |
|
| 3 | + die('You cannot run this script directly!'); |
|
| 4 | 4 | } |
| 5 | 5 | |
| 6 | 6 | $processManager = new tx_crawler_domain_process_manager(); |
| 7 | 7 | $timeout = isset($_SERVER['argv'][1] ) ? intval($_SERVER['argv'][1]) : 10000; |
| 8 | 8 | |
| 9 | 9 | try { |
| 10 | - $processManager->multiProcess($timeout); |
|
| 10 | + $processManager->multiProcess($timeout); |
|
| 11 | 11 | } catch (Exception $e) { |
| 12 | - echo PHP_EOL . $e->getMessage(); |
|
| 12 | + echo PHP_EOL . $e->getMessage(); |
|
| 13 | 13 | } |
@@ -4,10 +4,10 @@ |
||
| 4 | 4 | } |
| 5 | 5 | |
| 6 | 6 | $processManager = new tx_crawler_domain_process_manager(); |
| 7 | -$timeout = isset($_SERVER['argv'][1] ) ? intval($_SERVER['argv'][1]) : 10000; |
|
| 7 | +$timeout = isset($_SERVER['argv'][1]) ? intval($_SERVER['argv'][1]) : 10000; |
|
| 8 | 8 | |
| 9 | 9 | try { |
| 10 | 10 | $processManager->multiProcess($timeout); |
| 11 | 11 | } catch (Exception $e) { |
| 12 | - echo PHP_EOL . $e->getMessage(); |
|
| 12 | + echo PHP_EOL.$e->getMessage(); |
|
| 13 | 13 | } |
@@ -37,29 +37,29 @@ |
||
| 37 | 37 | */ |
| 38 | 38 | class tx_crawler_cli extends \TYPO3\CMS\Core\Controller\CommandLineController {
|
| 39 | 39 | |
| 40 | - /** |
|
| 41 | - * Constructor |
|
| 42 | - * |
|
| 43 | - * @return void |
|
| 44 | - */ |
|
| 45 | - function __construct() {
|
|
| 46 | - parent::__construct(); |
|
| 40 | + /** |
|
| 41 | + * Constructor |
|
| 42 | + * |
|
| 43 | + * @return void |
|
| 44 | + */ |
|
| 45 | + function __construct() {
|
|
| 46 | + parent::__construct(); |
|
| 47 | 47 | |
| 48 | - $this->cli_options[] = array('-h', 'Show the help', '');
|
|
| 49 | - $this->cli_options[] = array('--help', 'Same as -h', '');
|
|
| 50 | - $this->cli_options[] = array('--countInARun count', 'Amount of pages', 'How many pages should be crawled during that run.');
|
|
| 51 | - $this->cli_options[] = array('--sleepTime milliseconds', 'Millisecounds to relax system during crawls', 'Amount of millisecounds which the system should use to relax between crawls.');
|
|
| 52 | - $this->cli_options[] = array('--sleepAfterFinish seconds', 'Secounds to relax system after all crawls.', 'Amount of secounds which the system should use to relax after all crawls are done.');
|
|
| 48 | + $this->cli_options[] = array('-h', 'Show the help', '');
|
|
| 49 | + $this->cli_options[] = array('--help', 'Same as -h', '');
|
|
| 50 | + $this->cli_options[] = array('--countInARun count', 'Amount of pages', 'How many pages should be crawled during that run.');
|
|
| 51 | + $this->cli_options[] = array('--sleepTime milliseconds', 'Millisecounds to relax system during crawls', 'Amount of millisecounds which the system should use to relax between crawls.');
|
|
| 52 | + $this->cli_options[] = array('--sleepAfterFinish seconds', 'Secounds to relax system after all crawls.', 'Amount of secounds which the system should use to relax after all crawls are done.');
|
|
| 53 | 53 | |
| 54 | - // Setting help texts: |
|
| 55 | - $this->cli_help['name'] = 'crawler CLI interface -- Crawling the URLs from the queue'; |
|
| 56 | - $this->cli_help['synopsis'] = '###OPTIONS###'; |
|
| 57 | - $this->cli_help['description'] = ""; |
|
| 58 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler\nWill trigger the crawler which starts to process the queue entires\n"; |
|
| 59 | - $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2010'; |
|
| 60 | - } |
|
| 54 | + // Setting help texts: |
|
| 55 | + $this->cli_help['name'] = 'crawler CLI interface -- Crawling the URLs from the queue'; |
|
| 56 | + $this->cli_help['synopsis'] = '###OPTIONS###'; |
|
| 57 | + $this->cli_help['description'] = ""; |
|
| 58 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler\nWill trigger the crawler which starts to process the queue entires\n"; |
|
| 59 | + $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2010'; |
|
| 60 | + } |
|
| 61 | 61 | } |
| 62 | 62 | |
| 63 | 63 | if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli.php']) {
|
| 64 | - include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli.php']); |
|
| 64 | + include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli.php']); |
|
| 65 | 65 | } |
@@ -37,41 +37,41 @@ |
||
| 37 | 37 | */ |
| 38 | 38 | class tx_crawler_cli_im extends \TYPO3\CMS\Core\Controller\CommandLineController {
|
| 39 | 39 | |
| 40 | - /** |
|
| 41 | - * Constructor |
|
| 42 | - * |
|
| 43 | - * @return void |
|
| 44 | - */ |
|
| 45 | - function __construct() {
|
|
| 46 | - parent::__construct(); |
|
| 40 | + /** |
|
| 41 | + * Constructor |
|
| 42 | + * |
|
| 43 | + * @return void |
|
| 44 | + */ |
|
| 45 | + function __construct() {
|
|
| 46 | + parent::__construct(); |
|
| 47 | 47 | |
| 48 | - // Adding options to help archive: |
|
| 49 | - /** |
|
| 50 | - * We removed the "proc" option as it seemd not to be working any more. But as the complete handling of the crawler has changed regarding the configuration |
|
| 51 | - * this is completely ok. Since configuration records were introduced to configure "what should be done" additionally to page ts the way to setup jobs |
|
| 52 | - * has drifted from selecting filtering processing instructions to selecting/filtering configuration keys (you can configure the processing instructions there). |
|
| 53 | - * This is also reflected in the backend modules and allows you a much clearer and powerful way to work with the crawler extension. |
|
| 54 | - */ |
|
| 55 | - // $this->cli_options[] = array('-proc listOfProcInstr', 'Comma list of processing instructions. These are the "actions" carried out when crawling and you must specify at least one. Depends on third-party extensions. Examples are "tx_cachemgm_recache" from "cachemgm" extension (will recache pages), "tx_staticpub_publish" from "staticpub" (publishing pages to static files) or "tx_indexedsearch_reindex" from "indexed_search" (indexes pages).');
|
|
| 56 | - // TODO: cleanup here! |
|
| 57 | - $this->cli_options[] = array('-d depth', 'Tree depth, 0-99', "How many levels under the 'page_id' to include.");
|
|
| 58 | - $this->cli_options[] = array('-o mode', 'Output mode: "url", "exec", "queue"', "Specifies output modes\nurl : Will list URLs which wget could use as input.\nqueue: Will put entries in queue table.\nexec: Will execute all entries right away!");
|
|
| 59 | - $this->cli_options[] = array('-n number', 'Number of items per minute.', 'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"');
|
|
| 60 | - $this->cli_options[] = array('-conf configurationkeys','List of Configuration Keys','A commaseperated list of crawler configurations');
|
|
| 61 | - # $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors");
|
|
| 48 | + // Adding options to help archive: |
|
| 49 | + /** |
|
| 50 | + * We removed the "proc" option as it seemd not to be working any more. But as the complete handling of the crawler has changed regarding the configuration |
|
| 51 | + * this is completely ok. Since configuration records were introduced to configure "what should be done" additionally to page ts the way to setup jobs |
|
| 52 | + * has drifted from selecting filtering processing instructions to selecting/filtering configuration keys (you can configure the processing instructions there). |
|
| 53 | + * This is also reflected in the backend modules and allows you a much clearer and powerful way to work with the crawler extension. |
|
| 54 | + */ |
|
| 55 | + // $this->cli_options[] = array('-proc listOfProcInstr', 'Comma list of processing instructions. These are the "actions" carried out when crawling and you must specify at least one. Depends on third-party extensions. Examples are "tx_cachemgm_recache" from "cachemgm" extension (will recache pages), "tx_staticpub_publish" from "staticpub" (publishing pages to static files) or "tx_indexedsearch_reindex" from "indexed_search" (indexes pages).');
|
|
| 56 | + // TODO: cleanup here! |
|
| 57 | + $this->cli_options[] = array('-d depth', 'Tree depth, 0-99', "How many levels under the 'page_id' to include.");
|
|
| 58 | + $this->cli_options[] = array('-o mode', 'Output mode: "url", "exec", "queue"', "Specifies output modes\nurl : Will list URLs which wget could use as input.\nqueue: Will put entries in queue table.\nexec: Will execute all entries right away!");
|
|
| 59 | + $this->cli_options[] = array('-n number', 'Number of items per minute.', 'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"');
|
|
| 60 | + $this->cli_options[] = array('-conf configurationkeys','List of Configuration Keys','A commaseperated list of crawler configurations');
|
|
| 61 | + # $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors");
|
|
| 62 | 62 | |
| 63 | - // Setting help texts: |
|
| 64 | - $this->cli_help['name'] = 'crawler CLI interface -- Submitting URLs to be crawled via CLI interface.'; |
|
| 65 | - $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
|
| 66 | - $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It can put entries in the queue from command line options, return the list of URLs and even execute all entries right away without having to queue them up - this can be useful for immediate re-cache, re-indexing or static publishing from command line."; |
|
| 67 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_im 7 -d=2 -conf=<configurationKey> -o=exec\nWill re-cache pages from page 7 and two levels down, executed immediately.\n"; |
|
| 68 | - $this->cli_help['examples'].= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; |
|
| 69 | - $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
|
| 70 | - } |
|
| 63 | + // Setting help texts: |
|
| 64 | + $this->cli_help['name'] = 'crawler CLI interface -- Submitting URLs to be crawled via CLI interface.'; |
|
| 65 | + $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
|
| 66 | + $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It can put entries in the queue from command line options, return the list of URLs and even execute all entries right away without having to queue them up - this can be useful for immediate re-cache, re-indexing or static publishing from command line."; |
|
| 67 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_im 7 -d=2 -conf=<configurationKey> -o=exec\nWill re-cache pages from page 7 and two levels down, executed immediately.\n"; |
|
| 68 | + $this->cli_help['examples'].= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; |
|
| 69 | + $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
|
| 70 | + } |
|
| 71 | 71 | } |
| 72 | 72 | |
| 73 | 73 | if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']) {
|
| 74 | - include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']); |
|
| 74 | + include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']); |
|
| 75 | 75 | } |
| 76 | 76 | |
| 77 | 77 | ?> |
@@ -57,7 +57,7 @@ discard block |
||
| 57 | 57 | $this->cli_options[] = array('-d depth', 'Tree depth, 0-99', "How many levels under the 'page_id' to include.");
|
| 58 | 58 | $this->cli_options[] = array('-o mode', 'Output mode: "url", "exec", "queue"', "Specifies output modes\nurl : Will list URLs which wget could use as input.\nqueue: Will put entries in queue table.\nexec: Will execute all entries right away!");
|
| 59 | 59 | $this->cli_options[] = array('-n number', 'Number of items per minute.', 'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"');
|
| 60 | - $this->cli_options[] = array('-conf configurationkeys','List of Configuration Keys','A commaseperated list of crawler configurations');
|
|
| 60 | + $this->cli_options[] = array('-conf configurationkeys', 'List of Configuration Keys', 'A commaseperated list of crawler configurations');
|
|
| 61 | 61 | # $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors");
|
| 62 | 62 | |
| 63 | 63 | // Setting help texts: |
@@ -65,7 +65,7 @@ discard block |
||
| 65 | 65 | $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
| 66 | 66 | $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It can put entries in the queue from command line options, return the list of URLs and even execute all entries right away without having to queue them up - this can be useful for immediate re-cache, re-indexing or static publishing from command line."; |
| 67 | 67 | $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_im 7 -d=2 -conf=<configurationKey> -o=exec\nWill re-cache pages from page 7 and two levels down, executed immediately.\n"; |
| 68 | - $this->cli_help['examples'].= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; |
|
| 68 | + $this->cli_help['examples'] .= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; |
|
| 69 | 69 | $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
| 70 | 70 | } |
| 71 | 71 | } |
@@ -5,20 +5,20 @@ discard block |
||
| 5 | 5 | * Retrieve path (taken from cli_dispatch.phpsh) |
| 6 | 6 | */ |
| 7 | 7 | |
| 8 | - // Get path to this script |
|
| 8 | + // Get path to this script |
|
| 9 | 9 | $tempPathThisScript = isset($_SERVER['argv'][0]) ? $_SERVER['argv'][0] : (isset($_ENV['_']) ? $_ENV['_'] : $_SERVER['_']); |
| 10 | 10 | |
| 11 | - // Resolve path |
|
| 11 | + // Resolve path |
|
| 12 | 12 | if (!isAbsPath($tempPathThisScript)) { |
| 13 | - $workingDirectory = $_SERVER['PWD'] ? $_SERVER['PWD'] : getcwd(); |
|
| 14 | - if ($workingDirectory) { |
|
| 15 | - $tempPathThisScript = $workingDirectory . '/' . preg_replace('/\.\//', '', $tempPathThisScript); |
|
| 16 | - if (!@is_file($tempPathThisScript)) { |
|
| 17 | - die('Relative path found, but an error occured during resolving of the absolute path: ' . $tempPathThisScript . PHP_EOL); |
|
| 18 | - } |
|
| 19 | - } else { |
|
| 20 | - die('Relative path found, but resolving absolute path is not supported on this platform.' . PHP_EOL); |
|
| 21 | - } |
|
| 13 | + $workingDirectory = $_SERVER['PWD'] ? $_SERVER['PWD'] : getcwd(); |
|
| 14 | + if ($workingDirectory) { |
|
| 15 | + $tempPathThisScript = $workingDirectory . '/' . preg_replace('/\.\//', '', $tempPathThisScript); |
|
| 16 | + if (!@is_file($tempPathThisScript)) { |
|
| 17 | + die('Relative path found, but an error occured during resolving of the absolute path: ' . $tempPathThisScript . PHP_EOL); |
|
| 18 | + } |
|
| 19 | + } else { |
|
| 20 | + die('Relative path found, but resolving absolute path is not supported on this platform.' . PHP_EOL); |
|
| 21 | + } |
|
| 22 | 22 | } |
| 23 | 23 | |
| 24 | 24 | $typo3Root = preg_replace('#typo3conf/ext/crawler/cli/bootstrap.php$#', '', $tempPathThisScript); |
@@ -30,33 +30,33 @@ discard block |
||
| 30 | 30 | */ |
| 31 | 31 | $additionalHeaders = unserialize(base64_decode($_SERVER['argv'][3])); |
| 32 | 32 | if (is_array($additionalHeaders)) { |
| 33 | - foreach ($additionalHeaders as $additionalHeader) { |
|
| 34 | - if (strpos($additionalHeader, ':') !== FALSE) { |
|
| 35 | - list($key, $value) = explode(':', $additionalHeader, 2); |
|
| 36 | - $key = str_replace('-', '_', strtoupper(trim($key))); |
|
| 37 | - if ($key != 'HOST') { |
|
| 38 | - $_SERVER['HTTP_' . $key] = $value; |
|
| 39 | - } |
|
| 40 | - } |
|
| 41 | - } |
|
| 33 | + foreach ($additionalHeaders as $additionalHeader) { |
|
| 34 | + if (strpos($additionalHeader, ':') !== FALSE) { |
|
| 35 | + list($key, $value) = explode(':', $additionalHeader, 2); |
|
| 36 | + $key = str_replace('-', '_', strtoupper(trim($key))); |
|
| 37 | + if ($key != 'HOST') { |
|
| 38 | + $_SERVER['HTTP_' . $key] = $value; |
|
| 39 | + } |
|
| 40 | + } |
|
| 41 | + } |
|
| 42 | 42 | } |
| 43 | 43 | |
| 44 | 44 | |
| 45 | - // put parsed query parts into $_GET array |
|
| 45 | + // put parsed query parts into $_GET array |
|
| 46 | 46 | $urlParts = parse_url($_SERVER['argv'][2]); |
| 47 | - // Populating $_GET |
|
| 47 | + // Populating $_GET |
|
| 48 | 48 | parse_str($urlParts['query'], $_GET); |
| 49 | - // Populating $_REQUEST |
|
| 49 | + // Populating $_REQUEST |
|
| 50 | 50 | parse_str($urlParts['query'], $_REQUEST); |
| 51 | - // Populating $_POST |
|
| 51 | + // Populating $_POST |
|
| 52 | 52 | $_POST = array(); |
| 53 | - // Populating $_COOKIE |
|
| 53 | + // Populating $_COOKIE |
|
| 54 | 54 | $_COOKIE = array(); |
| 55 | 55 | |
| 56 | - // Get the TYPO3_SITE_PATH of the website frontend: |
|
| 56 | + // Get the TYPO3_SITE_PATH of the website frontend: |
|
| 57 | 57 | $typo3SitePath = $_SERVER['argv'][1]; |
| 58 | 58 | |
| 59 | - // faking the environment |
|
| 59 | + // faking the environment |
|
| 60 | 60 | $_SERVER['DOCUMENT_ROOT'] = preg_replace('#' . preg_quote($typo3SitePath, '#') . '$#', '', $typo3Root); |
| 61 | 61 | $_SERVER['HTTP_USER_AGENT'] = 'CLI Mode'; |
| 62 | 62 | $_SERVER['HTTP_HOST'] = $_SERVER['SERVER_NAME'] = $urlParts['host']; |
@@ -66,15 +66,15 @@ discard block |
||
| 66 | 66 | $_SERVER['REQUEST_URI'] = $urlParts['path'] . (isset($urlParts['query']) ? '?' . $urlParts['query'] : ''); |
| 67 | 67 | $_SERVER['REQUEST_METHOD'] = 'GET'; |
| 68 | 68 | |
| 69 | - // Define a port if used in the URL: |
|
| 69 | + // Define a port if used in the URL: |
|
| 70 | 70 | if (isset($urlParts['port'])) { |
| 71 | - $_SERVER['HTTP_HOST'] .= ':' . $urlParts['port']; |
|
| 72 | - $_SERVER['SERVER_PORT'] = $urlParts['port']; |
|
| 71 | + $_SERVER['HTTP_HOST'] .= ':' . $urlParts['port']; |
|
| 72 | + $_SERVER['SERVER_PORT'] = $urlParts['port']; |
|
| 73 | 73 | } |
| 74 | 74 | |
| 75 | - // Define HTTPS disposal: |
|
| 75 | + // Define HTTPS disposal: |
|
| 76 | 76 | if ($urlParts['scheme'] === 'https') { |
| 77 | - $_SERVER['HTTPS'] = 'on'; |
|
| 77 | + $_SERVER['HTTPS'] = 'on'; |
|
| 78 | 78 | } |
| 79 | 79 | |
| 80 | 80 | chdir($typo3Root); |
@@ -88,11 +88,11 @@ discard block |
||
| 88 | 88 | * @return boolean |
| 89 | 89 | */ |
| 90 | 90 | function isAbsPath($path) { |
| 91 | - // on Windows also a path starting with a drive letter is absolute: X:/ |
|
| 92 | - if (stristr(PHP_OS, 'win') && substr($path, 1, 2) === ':/') { |
|
| 93 | - return TRUE; |
|
| 94 | - } |
|
| 91 | + // on Windows also a path starting with a drive letter is absolute: X:/ |
|
| 92 | + if (stristr(PHP_OS, 'win') && substr($path, 1, 2) === ':/') { |
|
| 93 | + return TRUE; |
|
| 94 | + } |
|
| 95 | 95 | |
| 96 | - // path starting with a / is always absolute, on every system |
|
| 97 | - return (substr($path, 0, 1) === '/'); |
|
| 96 | + // path starting with a / is always absolute, on every system |
|
| 97 | + return (substr($path, 0, 1) === '/'); |
|
| 98 | 98 | } |
@@ -12,12 +12,12 @@ discard block |
||
| 12 | 12 | if (!isAbsPath($tempPathThisScript)) { |
| 13 | 13 | $workingDirectory = $_SERVER['PWD'] ? $_SERVER['PWD'] : getcwd(); |
| 14 | 14 | if ($workingDirectory) { |
| 15 | - $tempPathThisScript = $workingDirectory . '/' . preg_replace('/\.\//', '', $tempPathThisScript); |
|
| 15 | + $tempPathThisScript = $workingDirectory.'/'.preg_replace('/\.\//', '', $tempPathThisScript); |
|
| 16 | 16 | if (!@is_file($tempPathThisScript)) { |
| 17 | - die('Relative path found, but an error occured during resolving of the absolute path: ' . $tempPathThisScript . PHP_EOL); |
|
| 17 | + die('Relative path found, but an error occured during resolving of the absolute path: '.$tempPathThisScript.PHP_EOL); |
|
| 18 | 18 | } |
| 19 | 19 | } else { |
| 20 | - die('Relative path found, but resolving absolute path is not supported on this platform.' . PHP_EOL); |
|
| 20 | + die('Relative path found, but resolving absolute path is not supported on this platform.'.PHP_EOL); |
|
| 21 | 21 | } |
| 22 | 22 | } |
| 23 | 23 | |
@@ -35,7 +35,7 @@ discard block |
||
| 35 | 35 | list($key, $value) = explode(':', $additionalHeader, 2); |
| 36 | 36 | $key = str_replace('-', '_', strtoupper(trim($key))); |
| 37 | 37 | if ($key != 'HOST') { |
| 38 | - $_SERVER['HTTP_' . $key] = $value; |
|
| 38 | + $_SERVER['HTTP_'.$key] = $value; |
|
| 39 | 39 | } |
| 40 | 40 | } |
| 41 | 41 | } |
@@ -57,18 +57,18 @@ discard block |
||
| 57 | 57 | $typo3SitePath = $_SERVER['argv'][1]; |
| 58 | 58 | |
| 59 | 59 | // faking the environment |
| 60 | -$_SERVER['DOCUMENT_ROOT'] = preg_replace('#' . preg_quote($typo3SitePath, '#') . '$#', '', $typo3Root); |
|
| 60 | +$_SERVER['DOCUMENT_ROOT'] = preg_replace('#'.preg_quote($typo3SitePath, '#').'$#', '', $typo3Root); |
|
| 61 | 61 | $_SERVER['HTTP_USER_AGENT'] = 'CLI Mode'; |
| 62 | 62 | $_SERVER['HTTP_HOST'] = $_SERVER['SERVER_NAME'] = $urlParts['host']; |
| 63 | -$_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'] = $typo3SitePath . 'index.php'; |
|
| 64 | -$_SERVER['SCRIPT_FILENAME'] = $_SERVER['PATH_TRANSLATED'] = $typo3Root . 'index.php'; |
|
| 63 | +$_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'] = $typo3SitePath.'index.php'; |
|
| 64 | +$_SERVER['SCRIPT_FILENAME'] = $_SERVER['PATH_TRANSLATED'] = $typo3Root.'index.php'; |
|
| 65 | 65 | $_SERVER['QUERY_STRING'] = (isset($urlParts['query']) ? $urlParts['query'] : ''); |
| 66 | -$_SERVER['REQUEST_URI'] = $urlParts['path'] . (isset($urlParts['query']) ? '?' . $urlParts['query'] : ''); |
|
| 66 | +$_SERVER['REQUEST_URI'] = $urlParts['path'].(isset($urlParts['query']) ? '?'.$urlParts['query'] : ''); |
|
| 67 | 67 | $_SERVER['REQUEST_METHOD'] = 'GET'; |
| 68 | 68 | |
| 69 | 69 | // Define a port if used in the URL: |
| 70 | 70 | if (isset($urlParts['port'])) { |
| 71 | - $_SERVER['HTTP_HOST'] .= ':' . $urlParts['port']; |
|
| 71 | + $_SERVER['HTTP_HOST'] .= ':'.$urlParts['port']; |
|
| 72 | 72 | $_SERVER['SERVER_PORT'] = $urlParts['port']; |
| 73 | 73 | } |
| 74 | 74 | |
@@ -78,7 +78,7 @@ discard block |
||
| 78 | 78 | } |
| 79 | 79 | |
| 80 | 80 | chdir($typo3Root); |
| 81 | -include($typo3Root . '/index.php'); |
|
| 81 | +include($typo3Root.'/index.php'); |
|
| 82 | 82 | |
| 83 | 83 | |
| 84 | 84 | /** |
@@ -37,30 +37,30 @@ |
||
| 37 | 37 | */ |
| 38 | 38 | class tx_crawler_cli_flush extends \TYPO3\CMS\Core\Controller\CommandLineController { |
| 39 | 39 | |
| 40 | - /** |
|
| 41 | - * Constructor |
|
| 42 | - * |
|
| 43 | - * @return void |
|
| 44 | - */ |
|
| 45 | - function __construct() { |
|
| 46 | - parent::__construct(); |
|
| 40 | + /** |
|
| 41 | + * Constructor |
|
| 42 | + * |
|
| 43 | + * @return void |
|
| 44 | + */ |
|
| 45 | + function __construct() { |
|
| 46 | + parent::__construct(); |
|
| 47 | 47 | |
| 48 | - // Adding options to help archive: |
|
| 49 | - $this->cli_options[] = array('-o mode', 'Output mode: "finished", "all", "pending"', "Specifies the type queue entries which is flushed in the process."); |
|
| 50 | - # $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors"); |
|
| 48 | + // Adding options to help archive: |
|
| 49 | + $this->cli_options[] = array('-o mode', 'Output mode: "finished", "all", "pending"', "Specifies the type queue entries which is flushed in the process."); |
|
| 50 | + # $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors"); |
|
| 51 | 51 | |
| 52 | - // Setting help texts: |
|
| 53 | - $this->cli_help['name'] = 'crawler CLI interface -- Cleaning up the queue.'; |
|
| 54 | - $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
|
| 55 | - $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It will remove queue entires and perform a cleanup."; |
|
| 56 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 5 -o=finished\nWill remove all finished queue-entries in the sub-branch of page 5\n"; |
|
| 57 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 0 -o=all\nWill remove all queue-entries for every page\n"; |
|
| 58 | - $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
|
| 59 | - } |
|
| 52 | + // Setting help texts: |
|
| 53 | + $this->cli_help['name'] = 'crawler CLI interface -- Cleaning up the queue.'; |
|
| 54 | + $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
|
| 55 | + $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It will remove queue entires and perform a cleanup."; |
|
| 56 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 5 -o=finished\nWill remove all finished queue-entries in the sub-branch of page 5\n"; |
|
| 57 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 0 -o=all\nWill remove all queue-entries for every page\n"; |
|
| 58 | + $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
|
| 59 | + } |
|
| 60 | 60 | } |
| 61 | 61 | |
| 62 | 62 | if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_flush.php']) { |
| 63 | - include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_flush.php']); |
|
| 63 | + include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_flush.php']); |
|
| 64 | 64 | } |
| 65 | 65 | |
| 66 | 66 | ?> |
@@ -1149,7 +1149,7 @@ discard block |
||
| 1149 | 1149 | return time(); |
| 1150 | 1150 | } |
| 1151 | 1151 | |
| 1152 | - /************************************ |
|
| 1152 | + /************************************ |
|
| 1153 | 1153 | * |
| 1154 | 1154 | * URL reading |
| 1155 | 1155 | * |
@@ -1316,7 +1316,7 @@ discard block |
||
| 1316 | 1316 | return FALSE; |
| 1317 | 1317 | } |
| 1318 | 1318 | |
| 1319 | - // direct request |
|
| 1319 | + // direct request |
|
| 1320 | 1320 | if ($this->extensionSettings['makeDirectRequests']) { |
| 1321 | 1321 | $result = $this->sendDirectRequest($originalUrl, $crawlerId); |
| 1322 | 1322 | return $result; |
@@ -2328,7 +2328,7 @@ discard block |
||
| 2328 | 2328 | * |
| 2329 | 2329 | * @return void |
| 2330 | 2330 | */ |
| 2331 | - public function CLI_deleteProcessesMarkedDeleted() { |
|
| 2331 | + public function CLI_deleteProcessesMarkedDeleted() { |
|
| 2332 | 2332 | $this->db->exec_DELETEquery('tx_crawler_process', 'deleted = 1'); |
| 2333 | 2333 | } |
| 2334 | 2334 | |
@@ -29,8 +29,8 @@ discard block |
||
| 29 | 29 | class tx_crawler_lib { |
| 30 | 30 | |
| 31 | 31 | var $setID = 0; |
| 32 | - var $processID =''; |
|
| 33 | - var $max_CLI_exec_time = 3600; // One hour is max stalled time for the CLI (If the process has had the status "start" for 3600 seconds it will be regarded stalled and a new process is started. |
|
| 32 | + var $processID = ''; |
|
| 33 | + var $max_CLI_exec_time = 3600; // One hour is max stalled time for the CLI (If the process has had the status "start" for 3600 seconds it will be regarded stalled and a new process is started. |
|
| 34 | 34 | |
| 35 | 35 | var $duplicateTrack = array(); |
| 36 | 36 | var $downloadUrls = array(); |
@@ -43,9 +43,9 @@ discard block |
||
| 43 | 43 | var $queueEntries = array(); |
| 44 | 44 | var $urlList = array(); |
| 45 | 45 | |
| 46 | - var $debugMode=FALSE; |
|
| 46 | + var $debugMode = FALSE; |
|
| 47 | 47 | |
| 48 | - var $extensionSettings=array(); |
|
| 48 | + var $extensionSettings = array(); |
|
| 49 | 49 | |
| 50 | 50 | var $MP = false; // mount point |
| 51 | 51 | |
@@ -69,9 +69,9 @@ discard block |
||
| 69 | 69 | private $backendUser; |
| 70 | 70 | |
| 71 | 71 | const CLI_STATUS_NOTHING_PROCCESSED = 0; |
| 72 | - const CLI_STATUS_REMAIN = 1; //queue not empty |
|
| 73 | - const CLI_STATUS_PROCESSED = 2; //(some) queue items where processed |
|
| 74 | - const CLI_STATUS_ABORTED = 4; //instance didn't finish |
|
| 72 | + const CLI_STATUS_REMAIN = 1; //queue not empty |
|
| 73 | + const CLI_STATUS_PROCESSED = 2; //(some) queue items where processed |
|
| 74 | + const CLI_STATUS_ABORTED = 4; //instance didn't finish |
|
| 75 | 75 | const CLI_STATUS_POLLABLE_PROCESSED = 8; |
| 76 | 76 | |
| 77 | 77 | /** |
@@ -162,7 +162,7 @@ discard block |
||
| 162 | 162 | $this->extensionSettings['countInARun'] = 100; |
| 163 | 163 | } |
| 164 | 164 | |
| 165 | - $this->extensionSettings['processLimit'] = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($this->extensionSettings['processLimit'],1,99,1); |
|
| 165 | + $this->extensionSettings['processLimit'] = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($this->extensionSettings['processLimit'], 1, 99, 1); |
|
| 166 | 166 | } |
| 167 | 167 | |
| 168 | 168 | /** |
@@ -195,7 +195,7 @@ discard block |
||
| 195 | 195 | } |
| 196 | 196 | |
| 197 | 197 | if (!$skipPage) { |
| 198 | - if (\TYPO3\CMS\Core\Utility\GeneralUtility::inList('3,4', $pageRow['doktype']) || $pageRow['doktype']>=199) { |
|
| 198 | + if (\TYPO3\CMS\Core\Utility\GeneralUtility::inList('3,4', $pageRow['doktype']) || $pageRow['doktype'] >= 199) { |
|
| 199 | 199 | $skipPage = true; |
| 200 | 200 | $skipMessage = 'Because doktype is not allowed'; |
| 201 | 201 | } |
@@ -216,13 +216,13 @@ discard block |
||
| 216 | 216 | if (!$skipPage) { |
| 217 | 217 | // veto hook |
| 218 | 218 | if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pageVeto'])) { |
| 219 | - foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pageVeto'] as $key => $func) { |
|
| 219 | + foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['pageVeto'] as $key => $func) { |
|
| 220 | 220 | $params = array( |
| 221 | 221 | 'pageRow' => $pageRow |
| 222 | 222 | ); |
| 223 | 223 | // expects "false" if page is ok and "true" or a skipMessage if this page should _not_ be crawled |
| 224 | 224 | $veto = \TYPO3\CMS\Core\Utility\GeneralUtility::callUserFunction($func, $params, $this); |
| 225 | - if ($veto !== false) { |
|
| 225 | + if ($veto !== false) { |
|
| 226 | 226 | $skipPage = true; |
| 227 | 227 | if (is_string($veto)) { |
| 228 | 228 | $skipMessage = $veto; |
@@ -271,9 +271,9 @@ discard block |
||
| 271 | 271 | * @param string $configurationHash |
| 272 | 272 | * @return boolean |
| 273 | 273 | */ |
| 274 | - protected function noUnprocessedQueueEntriesForPageWithConfigurationHashExist($uid,$configurationHash) { |
|
| 275 | - $configurationHash = $this->db->fullQuoteStr($configurationHash,'tx_crawler_queue'); |
|
| 276 | - $res = $this->db->exec_SELECTquery('count(*) as anz','tx_crawler_queue',"page_id=".intval($uid)." AND configuration_hash=".$configurationHash." AND exec_time=0"); |
|
| 274 | + protected function noUnprocessedQueueEntriesForPageWithConfigurationHashExist($uid, $configurationHash) { |
|
| 275 | + $configurationHash = $this->db->fullQuoteStr($configurationHash, 'tx_crawler_queue'); |
|
| 276 | + $res = $this->db->exec_SELECTquery('count(*) as anz', 'tx_crawler_queue', "page_id=".intval($uid)." AND configuration_hash=".$configurationHash." AND exec_time=0"); |
|
| 277 | 277 | $row = $this->db->sql_fetch_assoc($res); |
| 278 | 278 | |
| 279 | 279 | return ($row['anz'] == 0); |
@@ -338,26 +338,26 @@ discard block |
||
| 338 | 338 | } |
| 339 | 339 | } |
| 340 | 340 | |
| 341 | - if (is_array($vv['URLs'])) { |
|
| 342 | - $configurationHash = md5(serialize($vv)); |
|
| 343 | - $skipInnerCheck = $this->noUnprocessedQueueEntriesForPageWithConfigurationHashExist($pageRow['uid'],$configurationHash); |
|
| 341 | + if (is_array($vv['URLs'])) { |
|
| 342 | + $configurationHash = md5(serialize($vv)); |
|
| 343 | + $skipInnerCheck = $this->noUnprocessedQueueEntriesForPageWithConfigurationHashExist($pageRow['uid'], $configurationHash); |
|
| 344 | 344 | |
| 345 | - foreach($vv['URLs'] as $urlQuery) { |
|
| 345 | + foreach ($vv['URLs'] as $urlQuery) { |
|
| 346 | 346 | |
| 347 | - if ($this->drawURLs_PIfilter($vv['subCfg']['procInstrFilter'], $incomingProcInstructions)) { |
|
| 347 | + if ($this->drawURLs_PIfilter($vv['subCfg']['procInstrFilter'], $incomingProcInstructions)) { |
|
| 348 | 348 | |
| 349 | 349 | // Calculate cHash: |
| 350 | - if ($vv['subCfg']['cHash']) { |
|
| 350 | + if ($vv['subCfg']['cHash']) { |
|
| 351 | 351 | /* @var $cacheHash \TYPO3\CMS\Frontend\Page\CacheHashCalculator */ |
| 352 | 352 | $cacheHash = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\CMS\Frontend\Page\CacheHashCalculator'); |
| 353 | - $urlQuery .= '&cHash=' . $cacheHash->generateForParameters($urlQuery); |
|
| 353 | + $urlQuery .= '&cHash='.$cacheHash->generateForParameters($urlQuery); |
|
| 354 | 354 | } |
| 355 | 355 | |
| 356 | 356 | // Create key by which to determine unique-ness: |
| 357 | 357 | $uKey = $urlQuery.'|'.$vv['subCfg']['userGroups'].'|'.$vv['subCfg']['baseUrl'].'|'.$vv['subCfg']['procInstrFilter']; |
| 358 | 358 | |
| 359 | 359 | // realurl support (thanks to Ingo Renner) |
| 360 | - $urlQuery = 'index.php' . $urlQuery; |
|
| 360 | + $urlQuery = 'index.php'.$urlQuery; |
|
| 361 | 361 | if (\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::isLoaded('realurl') && $vv['subCfg']['realurl']) { |
| 362 | 362 | $params = array( |
| 363 | 363 | 'LD' => array( |
@@ -370,8 +370,8 @@ discard block |
||
| 370 | 370 | } |
| 371 | 371 | |
| 372 | 372 | // Scheduled time: |
| 373 | - $schTime = $scheduledTime + round(count($duplicateTrack)*(60/$reqMinute)); |
|
| 374 | - $schTime = floor($schTime/60)*60; |
|
| 373 | + $schTime = $scheduledTime + round(count($duplicateTrack) * (60 / $reqMinute)); |
|
| 374 | + $schTime = floor($schTime / 60) * 60; |
|
| 375 | 375 | |
| 376 | 376 | if (isset($duplicateTrack[$uKey])) { |
| 377 | 377 | |
@@ -383,10 +383,10 @@ discard block |
||
| 383 | 383 | $urlList = '['.date('d.m.y H:i', $schTime).'] '.htmlspecialchars($urlQuery); |
| 384 | 384 | $this->urlList[] = '['.date('d.m.y H:i', $schTime).'] '.$urlQuery; |
| 385 | 385 | |
| 386 | - $theUrl = ($vv['subCfg']['baseUrl'] ? $vv['subCfg']['baseUrl'] : \TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_URL')) . $urlQuery; |
|
| 386 | + $theUrl = ($vv['subCfg']['baseUrl'] ? $vv['subCfg']['baseUrl'] : \TYPO3\CMS\Core\Utility\GeneralUtility::getIndpEnv('TYPO3_SITE_URL')).$urlQuery; |
|
| 387 | 387 | |
| 388 | 388 | // Submit for crawling! |
| 389 | - if ($submitCrawlUrls) { |
|
| 389 | + if ($submitCrawlUrls) { |
|
| 390 | 390 | $added = $this->addUrl( |
| 391 | 391 | $pageRow['uid'], |
| 392 | 392 | $theUrl, |
@@ -398,7 +398,7 @@ discard block |
||
| 398 | 398 | if ($added === false) { |
| 399 | 399 | $urlList .= ' (Url already existed)'; |
| 400 | 400 | } |
| 401 | - } elseif ($downloadCrawlUrls) { |
|
| 401 | + } elseif ($downloadCrawlUrls) { |
|
| 402 | 402 | $downloadUrls[$theUrl] = $theUrl; |
| 403 | 403 | } |
| 404 | 404 | |
@@ -427,7 +427,7 @@ discard block |
||
| 427 | 427 | return TRUE; |
| 428 | 428 | } |
| 429 | 429 | |
| 430 | - foreach($incomingProcInstructions as $pi) { |
|
| 430 | + foreach ($incomingProcInstructions as $pi) { |
|
| 431 | 431 | if (\TYPO3\CMS\Core\Utility\GeneralUtility::inList($piString, $pi)) { |
| 432 | 432 | return TRUE; |
| 433 | 433 | } |
@@ -440,7 +440,7 @@ discard block |
||
| 440 | 440 | * @return array |
| 441 | 441 | */ |
| 442 | 442 | public function getPageTSconfigForId($id) { |
| 443 | - if(!$this->MP){ |
|
| 443 | + if (!$this->MP) { |
|
| 444 | 444 | $pageTSconfig = \TYPO3\CMS\Backend\Utility\BackendUtility::getPagesTSconfig($id); |
| 445 | 445 | } else { |
| 446 | 446 | list(,$mountPointId) = explode('-', $this->MP); |
@@ -468,7 +468,7 @@ discard block |
||
| 468 | 468 | * @param integer $id Page ID |
| 469 | 469 | * @return array Configurations from pages and configuration records |
| 470 | 470 | */ |
| 471 | - protected function getUrlsForPageId($id) { |
|
| 471 | + protected function getUrlsForPageId($id) { |
|
| 472 | 472 | |
| 473 | 473 | /** |
| 474 | 474 | * Get configuration from tsConfig |
@@ -479,24 +479,24 @@ discard block |
||
| 479 | 479 | |
| 480 | 480 | $res = array(); |
| 481 | 481 | |
| 482 | - if (is_array($pageTSconfig) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.'])) { |
|
| 482 | + if (is_array($pageTSconfig) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.'])) { |
|
| 483 | 483 | $crawlerCfg = $pageTSconfig['tx_crawler.']['crawlerCfg.']; |
| 484 | 484 | |
| 485 | - if (is_array($crawlerCfg['paramSets.'])) { |
|
| 486 | - foreach($crawlerCfg['paramSets.'] as $key => $values) { |
|
| 487 | - if (!is_array($values)) { |
|
| 485 | + if (is_array($crawlerCfg['paramSets.'])) { |
|
| 486 | + foreach ($crawlerCfg['paramSets.'] as $key => $values) { |
|
| 487 | + if (!is_array($values)) { |
|
| 488 | 488 | |
| 489 | 489 | // Sub configuration for a single configuration string: |
| 490 | - $subCfg = (array)$crawlerCfg['paramSets.'][$key.'.']; |
|
| 490 | + $subCfg = (array) $crawlerCfg['paramSets.'][$key.'.']; |
|
| 491 | 491 | $subCfg['key'] = $key; |
| 492 | 492 | |
| 493 | - if (strcmp($subCfg['procInstrFilter'],'')) { |
|
| 494 | - $subCfg['procInstrFilter'] = implode(',',\TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$subCfg['procInstrFilter'])); |
|
| 493 | + if (strcmp($subCfg['procInstrFilter'], '')) { |
|
| 494 | + $subCfg['procInstrFilter'] = implode(',', \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $subCfg['procInstrFilter'])); |
|
| 495 | 495 | } |
| 496 | - $pidOnlyList = implode(',',\TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$subCfg['pidsOnly'],1)); |
|
| 496 | + $pidOnlyList = implode(',', \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $subCfg['pidsOnly'], 1)); |
|
| 497 | 497 | |
| 498 | 498 | // process configuration if it is not page-specific or if the specific page is the current page: |
| 499 | - if (!strcmp($subCfg['pidsOnly'],'') || \TYPO3\CMS\Core\Utility\GeneralUtility::inList($pidOnlyList,$id)) { |
|
| 499 | + if (!strcmp($subCfg['pidsOnly'], '') || \TYPO3\CMS\Core\Utility\GeneralUtility::inList($pidOnlyList, $id)) { |
|
| 500 | 500 | |
| 501 | 501 | // add trailing slash if not present |
| 502 | 502 | if (!empty($subCfg['baseUrl']) && substr($subCfg['baseUrl'], -1) != '/') { |
@@ -507,14 +507,14 @@ discard block |
||
| 507 | 507 | $res[$key] = array(); |
| 508 | 508 | $res[$key]['subCfg'] = $subCfg; |
| 509 | 509 | $res[$key]['paramParsed'] = $this->parseParams($values); |
| 510 | - $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'],$id); |
|
| 510 | + $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $id); |
|
| 511 | 511 | $res[$key]['origin'] = 'pagets'; |
| 512 | 512 | |
| 513 | 513 | // recognize MP value |
| 514 | - if(!$this->MP){ |
|
| 515 | - $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'],array('?id='.$id)); |
|
| 514 | + if (!$this->MP) { |
|
| 515 | + $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], array('?id='.$id)); |
|
| 516 | 516 | } else { |
| 517 | - $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'],array('?id='.$id.'&MP='.$this->MP)); |
|
| 517 | + $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], array('?id='.$id.'&MP='.$this->MP)); |
|
| 518 | 518 | } |
| 519 | 519 | } |
| 520 | 520 | } |
@@ -535,7 +535,7 @@ discard block |
||
| 535 | 535 | 'tx_crawler_configuration', |
| 536 | 536 | 'pid', |
| 537 | 537 | intval($page['uid']), |
| 538 | - \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_crawler_configuration') . \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('tx_crawler_configuration') |
|
| 538 | + \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_crawler_configuration').\TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('tx_crawler_configuration') |
|
| 539 | 539 | ); |
| 540 | 540 | |
| 541 | 541 | if (is_array($configurationRecordsForCurrentPage)) { |
@@ -544,10 +544,10 @@ discard block |
||
| 544 | 544 | // check access to the configuration record |
| 545 | 545 | if (empty($configurationRecord['begroups']) || $GLOBALS['BE_USER']->isAdmin() || $this->hasGroupAccess($GLOBALS['BE_USER']->user['usergroup_cached_list'], $configurationRecord['begroups'])) { |
| 546 | 546 | |
| 547 | - $pidOnlyList = implode(',',\TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$configurationRecord['pidsonly'],1)); |
|
| 547 | + $pidOnlyList = implode(',', \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $configurationRecord['pidsonly'], 1)); |
|
| 548 | 548 | |
| 549 | 549 | // process configuration if it is not page-specific or if the specific page is the current page: |
| 550 | - if (!strcmp($configurationRecord['pidsonly'],'') || \TYPO3\CMS\Core\Utility\GeneralUtility::inList($pidOnlyList,$id)) { |
|
| 550 | + if (!strcmp($configurationRecord['pidsonly'], '') || \TYPO3\CMS\Core\Utility\GeneralUtility::inList($pidOnlyList, $id)) { |
|
| 551 | 551 | $key = $configurationRecord['name']; |
| 552 | 552 | |
| 553 | 553 | // don't overwrite previously defined paramSets |
@@ -577,7 +577,7 @@ discard block |
||
| 577 | 577 | $res[$key]['subCfg'] = $subCfg; |
| 578 | 578 | $res[$key]['paramParsed'] = $this->parseParams($configurationRecord['configuration']); |
| 579 | 579 | $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $id); |
| 580 | - $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], array('?id=' . $id)); |
|
| 580 | + $res[$key]['URLs'] = $this->compileUrls($res[$key]['paramExpanded'], array('?id='.$id)); |
|
| 581 | 581 | $res[$key]['origin'] = 'tx_crawler_configuration_'.$configurationRecord['uid']; |
| 582 | 582 | } |
| 583 | 583 | } |
@@ -587,8 +587,8 @@ discard block |
||
| 587 | 587 | } |
| 588 | 588 | } |
| 589 | 589 | |
| 590 | - if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'])) { |
|
| 591 | - foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'] as $func) { |
|
| 590 | + if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'])) { |
|
| 591 | + foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['crawler']['processUrls'] as $func) { |
|
| 592 | 592 | $params = array( |
| 593 | 593 | 'res' => &$res, |
| 594 | 594 | ); |
@@ -613,8 +613,8 @@ discard block |
||
| 613 | 613 | $res = $this->db->exec_SELECTquery( |
| 614 | 614 | '*', |
| 615 | 615 | 'sys_domain', |
| 616 | - 'uid = '.$sysDomainUid . |
|
| 617 | - \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('sys_domain') . |
|
| 616 | + 'uid = '.$sysDomainUid. |
|
| 617 | + \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('sys_domain'). |
|
| 618 | 618 | \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('sys_domain') |
| 619 | 619 | ); |
| 620 | 620 | $row = $this->db->sql_fetch_assoc($res); |
@@ -638,24 +638,24 @@ discard block |
||
| 638 | 638 | $pageTSconfig = $this->getPageTSconfigForId($rootId); |
| 639 | 639 | if (is_array($pageTSconfig) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.']) && is_array($pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.'])) { |
| 640 | 640 | $sets = $pageTSconfig['tx_crawler.']['crawlerCfg.']['paramSets.']; |
| 641 | - if(is_array($sets)) { |
|
| 642 | - foreach($sets as $key=>$value) { |
|
| 643 | - if(!is_array($value)) continue; |
|
| 644 | - $configurationsForBranch[] = substr($key,-1)=='.'?substr($key,0,-1):$key; |
|
| 641 | + if (is_array($sets)) { |
|
| 642 | + foreach ($sets as $key=>$value) { |
|
| 643 | + if (!is_array($value)) continue; |
|
| 644 | + $configurationsForBranch[] = substr($key, -1) == '.' ?substr($key, 0, -1) : $key; |
|
| 645 | 645 | } |
| 646 | 646 | } |
| 647 | 647 | } |
| 648 | 648 | $pids = array(); |
| 649 | 649 | $rootLine = \TYPO3\CMS\Backend\Utility\BackendUtility::BEgetRootLine($rootId); |
| 650 | - foreach($rootLine as $node) { |
|
| 650 | + foreach ($rootLine as $node) { |
|
| 651 | 651 | $pids[] = $node['uid']; |
| 652 | 652 | } |
| 653 | 653 | /* @var \TYPO3\CMS\Backend\Tree\View\PageTreeView */ |
| 654 | 654 | $tree = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\CMS\Backend\Tree\View\PageTreeView'); |
| 655 | 655 | $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1); |
| 656 | - $tree->init('AND ' . $perms_clause); |
|
| 656 | + $tree->init('AND '.$perms_clause); |
|
| 657 | 657 | $tree->getTree($rootId, $depth, ''); |
| 658 | - foreach($tree->tree as $node) { |
|
| 658 | + foreach ($tree->tree as $node) { |
|
| 659 | 659 | $pids[] = $node['row']['uid']; |
| 660 | 660 | } |
| 661 | 661 | |
@@ -663,12 +663,12 @@ discard block |
||
| 663 | 663 | '*', |
| 664 | 664 | 'tx_crawler_configuration', |
| 665 | 665 | 'pid IN ('.implode(',', $pids).') '. |
| 666 | - \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_crawler_configuration') . |
|
| 666 | + \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_crawler_configuration'). |
|
| 667 | 667 | \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('tx_crawler_configuration').' '. |
| 668 | 668 | \TYPO3\CMS\Backend\Utility\BackendUtility::versioningPlaceholderClause('tx_crawler_configuration').' ' |
| 669 | 669 | ); |
| 670 | 670 | |
| 671 | - while($row = $this->db->sql_fetch_assoc($res)) { |
|
| 671 | + while ($row = $this->db->sql_fetch_assoc($res)) { |
|
| 672 | 672 | $configurationsForBranch[] = $row['name']; |
| 673 | 673 | } |
| 674 | 674 | $this->db->sql_free_result($res); |
@@ -690,7 +690,7 @@ discard block |
||
| 690 | 690 | if (empty($accessList)) { |
| 691 | 691 | return true; |
| 692 | 692 | } |
| 693 | - foreach(\TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',', $groupList) as $groupUid) { |
|
| 693 | + foreach (\TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',', $groupList) as $groupUid) { |
|
| 694 | 694 | if (\TYPO3\CMS\Core\Utility\GeneralUtility::inList($accessList, $groupUid)) { |
| 695 | 695 | return true; |
| 696 | 696 | } |
@@ -709,9 +709,9 @@ discard block |
||
| 709 | 709 | $paramKeyValues = array(); |
| 710 | 710 | $GETparams = explode('&', $inputQuery); |
| 711 | 711 | |
| 712 | - foreach($GETparams as $paramAndValue) { |
|
| 713 | - list($p,$v) = explode('=', $paramAndValue, 2); |
|
| 714 | - if (strlen($p)) { |
|
| 712 | + foreach ($GETparams as $paramAndValue) { |
|
| 713 | + list($p, $v) = explode('=', $paramAndValue, 2); |
|
| 714 | + if (strlen($p)) { |
|
| 715 | 715 | $paramKeyValues[rawurldecode($p)] = rawurldecode($v); |
| 716 | 716 | } |
| 717 | 717 | } |
@@ -734,84 +734,84 @@ discard block |
||
| 734 | 734 | * @param integer Current page ID |
| 735 | 735 | * @return array Array with key (GET var name) with the value being an array of all possible values for that key. |
| 736 | 736 | */ |
| 737 | - protected function expandParameters($paramArray, $pid) { |
|
| 737 | + protected function expandParameters($paramArray, $pid) { |
|
| 738 | 738 | global $TCA; |
| 739 | 739 | |
| 740 | 740 | // Traverse parameter names: |
| 741 | - foreach($paramArray as $p => $v) { |
|
| 741 | + foreach ($paramArray as $p => $v) { |
|
| 742 | 742 | $v = trim($v); |
| 743 | 743 | |
| 744 | 744 | // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal |
| 745 | - if (substr($v,0,1)==='[' && substr($v,-1)===']') { |
|
| 745 | + if (substr($v, 0, 1) === '[' && substr($v, -1) === ']') { |
|
| 746 | 746 | // So, find the value inside brackets and reset the paramArray value as an array. |
| 747 | - $v = substr($v,1,-1); |
|
| 747 | + $v = substr($v, 1, -1); |
|
| 748 | 748 | $paramArray[$p] = array(); |
| 749 | 749 | |
| 750 | 750 | // Explode parts and traverse them: |
| 751 | - $parts = explode('|',$v); |
|
| 752 | - foreach($parts as $pV) { |
|
| 751 | + $parts = explode('|', $v); |
|
| 752 | + foreach ($parts as $pV) { |
|
| 753 | 753 | |
| 754 | 754 | // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30) |
| 755 | - if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/',trim($pV),$reg)) { // Integer range: |
|
| 755 | + if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($pV), $reg)) { // Integer range: |
|
| 756 | 756 | |
| 757 | 757 | // Swap if first is larger than last: |
| 758 | - if ($reg[1] > $reg[2]) { |
|
| 758 | + if ($reg[1] > $reg[2]) { |
|
| 759 | 759 | $temp = $reg[2]; |
| 760 | 760 | $reg[2] = $reg[1]; |
| 761 | 761 | $reg[1] = $temp; |
| 762 | 762 | } |
| 763 | 763 | |
| 764 | 764 | // Traverse range, add values: |
| 765 | - $runAwayBrake = 1000; // Limit to size of range! |
|
| 766 | - for($a=$reg[1]; $a<=$reg[2];$a++) { |
|
| 765 | + $runAwayBrake = 1000; // Limit to size of range! |
|
| 766 | + for ($a = $reg[1]; $a <= $reg[2]; $a++) { |
|
| 767 | 767 | $paramArray[$p][] = $a; |
| 768 | 768 | $runAwayBrake--; |
| 769 | - if ($runAwayBrake<=0) { |
|
| 769 | + if ($runAwayBrake <= 0) { |
|
| 770 | 770 | break; |
| 771 | 771 | } |
| 772 | 772 | } |
| 773 | - } elseif (substr(trim($pV),0,7)=='_TABLE:') { |
|
| 773 | + } elseif (substr(trim($pV), 0, 7) == '_TABLE:') { |
|
| 774 | 774 | |
| 775 | 775 | // Parse parameters: |
| 776 | - $subparts = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(';',$pV); |
|
| 776 | + $subparts = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(';', $pV); |
|
| 777 | 777 | $subpartParams = array(); |
| 778 | - foreach($subparts as $spV) { |
|
| 779 | - list($pKey,$pVal) = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(':',$spV); |
|
| 778 | + foreach ($subparts as $spV) { |
|
| 779 | + list($pKey, $pVal) = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(':', $spV); |
|
| 780 | 780 | $subpartParams[$pKey] = $pVal; |
| 781 | 781 | } |
| 782 | 782 | |
| 783 | 783 | // Table exists: |
| 784 | - if (isset($TCA[$subpartParams['_TABLE']])) { |
|
| 784 | + if (isset($TCA[$subpartParams['_TABLE']])) { |
|
| 785 | 785 | $lookUpPid = isset($subpartParams['_PID']) ? intval($subpartParams['_PID']) : $pid; |
| 786 | 786 | $pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid'; |
| 787 | 787 | $where = isset($subpartParams['_WHERE']) ? $subpartParams['_WHERE'] : ''; |
| 788 | 788 | $addTable = isset($subpartParams['_ADDTABLE']) ? $subpartParams['_ADDTABLE'] : ''; |
| 789 | 789 | |
| 790 | 790 | $fieldName = $subpartParams['_FIELD'] ? $subpartParams['_FIELD'] : 'uid'; |
| 791 | - if ($fieldName==='uid' || $TCA[$subpartParams['_TABLE']]['columns'][$fieldName]) { |
|
| 791 | + if ($fieldName === 'uid' || $TCA[$subpartParams['_TABLE']]['columns'][$fieldName]) { |
|
| 792 | 792 | |
| 793 | 793 | $andWhereLanguage = ''; |
| 794 | 794 | $transOrigPointerField = $TCA[$subpartParams['_TABLE']]['ctrl']['transOrigPointerField']; |
| 795 | 795 | |
| 796 | 796 | if ($subpartParams['_ENABLELANG'] && $transOrigPointerField) { |
| 797 | - $andWhereLanguage = ' AND ' . $this->db->quoteStr($transOrigPointerField, $subpartParams['_TABLE']) .' <= 0 '; |
|
| 797 | + $andWhereLanguage = ' AND '.$this->db->quoteStr($transOrigPointerField, $subpartParams['_TABLE']).' <= 0 '; |
|
| 798 | 798 | } |
| 799 | 799 | |
| 800 | - $where = $this->db->quoteStr($pidField, $subpartParams['_TABLE']) .'='.intval($lookUpPid) . ' ' . |
|
| 801 | - $andWhereLanguage . $where; |
|
| 800 | + $where = $this->db->quoteStr($pidField, $subpartParams['_TABLE']).'='.intval($lookUpPid).' '. |
|
| 801 | + $andWhereLanguage.$where; |
|
| 802 | 802 | |
| 803 | 803 | $rows = $this->db->exec_SELECTgetRows( |
| 804 | 804 | $fieldName, |
| 805 | - $subpartParams['_TABLE'] . $addTable, |
|
| 806 | - $where . \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause($subpartParams['_TABLE']), |
|
| 805 | + $subpartParams['_TABLE'].$addTable, |
|
| 806 | + $where.\TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause($subpartParams['_TABLE']), |
|
| 807 | 807 | '', |
| 808 | 808 | '', |
| 809 | 809 | '', |
| 810 | 810 | $fieldName |
| 811 | 811 | ); |
| 812 | 812 | |
| 813 | - if (is_array($rows)) { |
|
| 814 | - $paramArray[$p] = array_merge($paramArray[$p],array_keys($rows)); |
|
| 813 | + if (is_array($rows)) { |
|
| 814 | + $paramArray[$p] = array_merge($paramArray[$p], array_keys($rows)); |
|
| 815 | 815 | } |
| 816 | 816 | } |
| 817 | 817 | } |
@@ -827,7 +827,7 @@ discard block |
||
| 827 | 827 | 'currentValue' => $pV, |
| 828 | 828 | 'pid' => $pid |
| 829 | 829 | ); |
| 830 | - foreach($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $key => $_funcRef) { |
|
| 830 | + foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $key => $_funcRef) { |
|
| 831 | 831 | \TYPO3\CMS\Core\Utility\GeneralUtility::callUserFunction($_funcRef, $_params, $this); |
| 832 | 832 | } |
| 833 | 833 | } |
@@ -863,11 +863,11 @@ discard block |
||
| 863 | 863 | |
| 864 | 864 | // Traverse value set: |
| 865 | 865 | $newUrls = array(); |
| 866 | - foreach($urls as $url) { |
|
| 867 | - foreach($valueSet as $val) { |
|
| 868 | - $newUrls[] = $url.(strcmp($val,'') ? '&'.rawurlencode($varName).'='.rawurlencode($val) : ''); |
|
| 866 | + foreach ($urls as $url) { |
|
| 867 | + foreach ($valueSet as $val) { |
|
| 868 | + $newUrls[] = $url.(strcmp($val, '') ? '&'.rawurlencode($varName).'='.rawurlencode($val) : ''); |
|
| 869 | 869 | |
| 870 | - if (count($newUrls) > \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000)) { |
|
| 870 | + if (count($newUrls) > \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($this->extensionSettings['maxCompileUrls'], 1, 1000000000, 10000)) { |
|
| 871 | 871 | break; |
| 872 | 872 | } |
| 873 | 873 | } |
@@ -897,7 +897,7 @@ discard block |
||
| 897 | 897 | */ |
| 898 | 898 | public function getLogEntriesForPageId($id, $filter = '', $doFlush = FALSE, $doFullFlush = FALSE, $itemsPerPage = 10) { |
| 899 | 899 | // FIXME: Write Unit tests for Filters |
| 900 | - switch($filter) { |
|
| 900 | + switch ($filter) { |
|
| 901 | 901 | case 'pending': |
| 902 | 902 | $addWhere = ' AND exec_time=0'; |
| 903 | 903 | break; |
@@ -911,13 +911,13 @@ discard block |
||
| 911 | 911 | |
| 912 | 912 | // FIXME: Write unit test that ensures that the right records are deleted. |
| 913 | 913 | if ($doFlush) { |
| 914 | - $this->flushQueue( ($doFullFlush?'1=1':('page_id='.intval($id))) .$addWhere); |
|
| 914 | + $this->flushQueue(($doFullFlush ? '1=1' : ('page_id='.intval($id))).$addWhere); |
|
| 915 | 915 | return array(); |
| 916 | 916 | } else { |
| 917 | 917 | return $this->db->exec_SELECTgetRows('*', |
| 918 | 918 | 'tx_crawler_queue', |
| 919 | - 'page_id=' . intval($id) . $addWhere, '', 'scheduled DESC', |
|
| 920 | - (intval($itemsPerPage)>0 ? intval($itemsPerPage) : '')); |
|
| 919 | + 'page_id='.intval($id).$addWhere, '', 'scheduled DESC', |
|
| 920 | + (intval($itemsPerPage) > 0 ? intval($itemsPerPage) : '')); |
|
| 921 | 921 | } |
| 922 | 922 | } |
| 923 | 923 | |
@@ -930,9 +930,9 @@ discard block |
||
| 930 | 930 | * @param integer Limit the amount of entires per page default is 10 |
| 931 | 931 | * @return array |
| 932 | 932 | */ |
| 933 | - public function getLogEntriesForSetId($set_id,$filter='',$doFlush=FALSE, $doFullFlush=FALSE, $itemsPerPage=10) { |
|
| 933 | + public function getLogEntriesForSetId($set_id, $filter = '', $doFlush = FALSE, $doFullFlush = FALSE, $itemsPerPage = 10) { |
|
| 934 | 934 | // FIXME: Write Unit tests for Filters |
| 935 | - switch($filter) { |
|
| 935 | + switch ($filter) { |
|
| 936 | 936 | case 'pending': |
| 937 | 937 | $addWhere = ' AND exec_time=0'; |
| 938 | 938 | break; |
@@ -944,14 +944,14 @@ discard block |
||
| 944 | 944 | break; |
| 945 | 945 | } |
| 946 | 946 | // FIXME: Write unit test that ensures that the right records are deleted. |
| 947 | - if ($doFlush) { |
|
| 948 | - $this->flushQueue($doFullFlush?'':('set_id='.intval($set_id).$addWhere)); |
|
| 947 | + if ($doFlush) { |
|
| 948 | + $this->flushQueue($doFullFlush ? '' : ('set_id='.intval($set_id).$addWhere)); |
|
| 949 | 949 | return array(); |
| 950 | 950 | } else { |
| 951 | 951 | return $this->db->exec_SELECTgetRows('*', |
| 952 | 952 | 'tx_crawler_queue', |
| 953 | - 'set_id='.intval($set_id).$addWhere,'','scheduled DESC', |
|
| 954 | - (intval($itemsPerPage)>0 ? intval($itemsPerPage) : '')); |
|
| 953 | + 'set_id='.intval($set_id).$addWhere, '', 'scheduled DESC', |
|
| 954 | + (intval($itemsPerPage) > 0 ? intval($itemsPerPage) : '')); |
|
| 955 | 955 | } |
| 956 | 956 | } |
| 957 | 957 | |
@@ -961,14 +961,14 @@ discard block |
||
| 961 | 961 | * @param $where SQL related filter for the entries which should be removed |
| 962 | 962 | * @return void |
| 963 | 963 | */ |
| 964 | - protected function flushQueue($where='') { |
|
| 964 | + protected function flushQueue($where = '') { |
|
| 965 | 965 | |
| 966 | - $realWhere = strlen($where)>0?$where:'1=1'; |
|
| 966 | + $realWhere = strlen($where) > 0 ? $where : '1=1'; |
|
| 967 | 967 | |
| 968 | - if(tx_crawler_domain_events_dispatcher::getInstance()->hasObserver('queueEntryFlush')) { |
|
| 969 | - $groups = $this->db->exec_SELECTgetRows('DISTINCT set_id','tx_crawler_queue',$realWhere); |
|
| 970 | - foreach($groups as $group) { |
|
| 971 | - tx_crawler_domain_events_dispatcher::getInstance()->post('queueEntryFlush',$group['set_id'], $this->db->exec_SELECTgetRows('uid, set_id','tx_crawler_queue',$realWhere.' AND set_id="'.$group['set_id'].'"')); |
|
| 968 | + if (tx_crawler_domain_events_dispatcher::getInstance()->hasObserver('queueEntryFlush')) { |
|
| 969 | + $groups = $this->db->exec_SELECTgetRows('DISTINCT set_id', 'tx_crawler_queue', $realWhere); |
|
| 970 | + foreach ($groups as $group) { |
|
| 971 | + tx_crawler_domain_events_dispatcher::getInstance()->post('queueEntryFlush', $group['set_id'], $this->db->exec_SELECTgetRows('uid, set_id', 'tx_crawler_queue', $realWhere.' AND set_id="'.$group['set_id'].'"')); |
|
| 972 | 972 | } |
| 973 | 973 | } |
| 974 | 974 | |
@@ -985,7 +985,7 @@ discard block |
||
| 985 | 985 | * @param integer Time at which to activate |
| 986 | 986 | * @return void |
| 987 | 987 | */ |
| 988 | - public function addQueueEntry_callBack($setId,$params,$callBack,$page_id=0,$schedule=0) { |
|
| 988 | + public function addQueueEntry_callBack($setId, $params, $callBack, $page_id = 0, $schedule = 0) { |
|
| 989 | 989 | |
| 990 | 990 | if (!is_array($params)) $params = array(); |
| 991 | 991 | $params['_CALLBACKOBJ'] = $callBack; |
@@ -1000,7 +1000,7 @@ discard block |
||
| 1000 | 1000 | 'result_data' => '', |
| 1001 | 1001 | ); |
| 1002 | 1002 | |
| 1003 | - $this->db->exec_INSERTquery('tx_crawler_queue',$fieldArray); |
|
| 1003 | + $this->db->exec_INSERTquery('tx_crawler_queue', $fieldArray); |
|
| 1004 | 1004 | } |
| 1005 | 1005 | |
| 1006 | 1006 | /************************************ |
@@ -1020,13 +1020,13 @@ discard block |
||
| 1020 | 1020 | * @param bool (optional) skip inner duplication check |
| 1021 | 1021 | * @return bool true if the url was added, false if it already existed |
| 1022 | 1022 | */ |
| 1023 | - protected function addUrl ( |
|
| 1023 | + protected function addUrl( |
|
| 1024 | 1024 | $id, |
| 1025 | 1025 | $url, |
| 1026 | 1026 | array $subCfg, |
| 1027 | 1027 | $tstamp, |
| 1028 | - $configurationHash='', |
|
| 1029 | - $skipInnerDuplicationCheck=false |
|
| 1028 | + $configurationHash = '', |
|
| 1029 | + $skipInnerDuplicationCheck = false |
|
| 1030 | 1030 | ) { |
| 1031 | 1031 | |
| 1032 | 1032 | $urlAdded = false; |
@@ -1037,14 +1037,14 @@ discard block |
||
| 1037 | 1037 | ); |
| 1038 | 1038 | |
| 1039 | 1039 | // fe user group simulation: |
| 1040 | - $uGs = implode(',',array_unique(\TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',',$subCfg['userGroups'],1))); |
|
| 1041 | - if ($uGs) { |
|
| 1040 | + $uGs = implode(',', array_unique(\TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',', $subCfg['userGroups'], 1))); |
|
| 1041 | + if ($uGs) { |
|
| 1042 | 1042 | $parameters['feUserGroupList'] = $uGs; |
| 1043 | 1043 | } |
| 1044 | 1044 | |
| 1045 | 1045 | // Setting processing instructions |
| 1046 | - $parameters['procInstructions'] = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$subCfg['procInstrFilter']); |
|
| 1047 | - if (is_array($subCfg['procInstrParams.'])) { |
|
| 1046 | + $parameters['procInstructions'] = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']); |
|
| 1047 | + if (is_array($subCfg['procInstrParams.'])) { |
|
| 1048 | 1048 | $parameters['procInstrParams'] = $subCfg['procInstrParams.']; |
| 1049 | 1049 | } |
| 1050 | 1050 | |
@@ -1063,14 +1063,14 @@ discard block |
||
| 1063 | 1063 | 'configuration' => $subCfg['key'], |
| 1064 | 1064 | ); |
| 1065 | 1065 | |
| 1066 | - if ($this->registerQueueEntriesInternallyOnly) { |
|
| 1066 | + if ($this->registerQueueEntriesInternallyOnly) { |
|
| 1067 | 1067 | //the entries will only be registered and not stored to the database |
| 1068 | 1068 | $this->queueEntries[] = $fieldArray; |
| 1069 | 1069 | } else { |
| 1070 | 1070 | |
| 1071 | - if(!$skipInnerDuplicationCheck){ |
|
| 1071 | + if (!$skipInnerDuplicationCheck) { |
|
| 1072 | 1072 | // check if there is already an equal entry |
| 1073 | - $rows = $this->getDuplicateRowsIfExist($tstamp,$fieldArray); |
|
| 1073 | + $rows = $this->getDuplicateRowsIfExist($tstamp, $fieldArray); |
|
| 1074 | 1074 | } |
| 1075 | 1075 | |
| 1076 | 1076 | if (count($rows) == 0) { |
@@ -1078,9 +1078,9 @@ discard block |
||
| 1078 | 1078 | $uid = $this->db->sql_insert_id(); |
| 1079 | 1079 | $rows[] = $uid; |
| 1080 | 1080 | $urlAdded = true; |
| 1081 | - tx_crawler_domain_events_dispatcher::getInstance()->post('urlAddedToQueue',$this->setID,array('uid' => $uid, 'fieldArray' => $fieldArray)); |
|
| 1082 | - }else{ |
|
| 1083 | - tx_crawler_domain_events_dispatcher::getInstance()->post('duplicateUrlInQueue',$this->setID,array('rows' => $rows, 'fieldArray' => $fieldArray)); |
|
| 1081 | + tx_crawler_domain_events_dispatcher::getInstance()->post('urlAddedToQueue', $this->setID, array('uid' => $uid, 'fieldArray' => $fieldArray)); |
|
| 1082 | + } else { |
|
| 1083 | + tx_crawler_domain_events_dispatcher::getInstance()->post('duplicateUrlInQueue', $this->setID, array('rows' => $rows, 'fieldArray' => $fieldArray)); |
|
| 1084 | 1084 | } |
| 1085 | 1085 | } |
| 1086 | 1086 | |
@@ -1097,34 +1097,34 @@ discard block |
||
| 1097 | 1097 | * |
| 1098 | 1098 | * @return array; |
| 1099 | 1099 | */ |
| 1100 | - protected function getDuplicateRowsIfExist($tstamp,$fieldArray){ |
|
| 1100 | + protected function getDuplicateRowsIfExist($tstamp, $fieldArray) { |
|
| 1101 | 1101 | $rows = array(); |
| 1102 | 1102 | |
| 1103 | 1103 | $currentTime = $this->getCurrentTime(); |
| 1104 | 1104 | |
| 1105 | 1105 | //if this entry is scheduled with "now" |
| 1106 | 1106 | if ($tstamp <= $currentTime) { |
| 1107 | - if($this->extensionSettings['enableTimeslot']){ |
|
| 1107 | + if ($this->extensionSettings['enableTimeslot']) { |
|
| 1108 | 1108 | $timeBegin = $currentTime - 100; |
| 1109 | - $timeEnd = $currentTime + 100; |
|
| 1110 | - $where = ' ((scheduled BETWEEN '.$timeBegin.' AND '.$timeEnd.' ) OR scheduled <= '. $currentTime.') '; |
|
| 1111 | - }else{ |
|
| 1112 | - $where = 'scheduled <= ' . $currentTime; |
|
| 1109 | + $timeEnd = $currentTime + 100; |
|
| 1110 | + $where = ' ((scheduled BETWEEN '.$timeBegin.' AND '.$timeEnd.' ) OR scheduled <= '.$currentTime.') '; |
|
| 1111 | + } else { |
|
| 1112 | + $where = 'scheduled <= '.$currentTime; |
|
| 1113 | 1113 | } |
| 1114 | 1114 | } elseif ($tstamp > $currentTime) { |
| 1115 | 1115 | //entry with a timestamp in the future need to have the same schedule time |
| 1116 | - $where = 'scheduled = ' . $tstamp ; |
|
| 1116 | + $where = 'scheduled = '.$tstamp; |
|
| 1117 | 1117 | } |
| 1118 | 1118 | |
| 1119 | - if(!empty($where)){ |
|
| 1119 | + if (!empty($where)) { |
|
| 1120 | 1120 | $result = $this->db->exec_SELECTgetRows( |
| 1121 | 1121 | 'qid', |
| 1122 | 1122 | 'tx_crawler_queue', |
| 1123 | 1123 | $where. |
| 1124 | - ' AND NOT exec_time' . |
|
| 1124 | + ' AND NOT exec_time'. |
|
| 1125 | 1125 | ' AND NOT process_id '. |
| 1126 | 1126 | ' AND page_id='.intval($fieldArray['page_id']). |
| 1127 | - ' AND parameters_hash = ' . $this->db->fullQuoteStr($fieldArray['parameters_hash'], 'tx_crawler_queue') |
|
| 1127 | + ' AND parameters_hash = '.$this->db->fullQuoteStr($fieldArray['parameters_hash'], 'tx_crawler_queue') |
|
| 1128 | 1128 | ); |
| 1129 | 1129 | |
| 1130 | 1130 | if (is_array($result)) { |
@@ -1145,7 +1145,7 @@ discard block |
||
| 1145 | 1145 | * |
| 1146 | 1146 | * @codeCoverageIgnore |
| 1147 | 1147 | */ |
| 1148 | - public function getCurrentTime(){ |
|
| 1148 | + public function getCurrentTime() { |
|
| 1149 | 1149 | return time(); |
| 1150 | 1150 | } |
| 1151 | 1151 | |
@@ -1166,18 +1166,18 @@ discard block |
||
| 1166 | 1166 | public function readUrl($queueId, $force = FALSE) { |
| 1167 | 1167 | $ret = 0; |
| 1168 | 1168 | if ($this->debugMode) { |
| 1169 | - \TYPO3\CMS\Core\Utility\GeneralUtility::devlog('crawler-readurl start ' . microtime(true), __FUNCTION__); |
|
| 1169 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devlog('crawler-readurl start '.microtime(true), __FUNCTION__); |
|
| 1170 | 1170 | } |
| 1171 | 1171 | // Get entry: |
| 1172 | 1172 | list($queueRec) = $this->db->exec_SELECTgetRows('*', 'tx_crawler_queue', |
| 1173 | - 'qid=' . intval($queueId) . ($force ? '' : ' AND exec_time=0 AND process_scheduled > 0')); |
|
| 1173 | + 'qid='.intval($queueId).($force ? '' : ' AND exec_time=0 AND process_scheduled > 0')); |
|
| 1174 | 1174 | |
| 1175 | 1175 | if (!is_array($queueRec)) { |
| 1176 | 1176 | return; |
| 1177 | 1177 | } |
| 1178 | 1178 | |
| 1179 | - $pageUidRootTypoScript = \AOE\Crawler\Utility\TypoScriptUtility::getPageUidForTypoScriptRootTemplateInRootLine((int)$queueRec['page_id']); |
|
| 1180 | - $this->initTSFE((int)$pageUidRootTypoScript); |
|
| 1179 | + $pageUidRootTypoScript = \AOE\Crawler\Utility\TypoScriptUtility::getPageUidForTypoScriptRootTemplateInRootLine((int) $queueRec['page_id']); |
|
| 1180 | + $this->initTSFE((int) $pageUidRootTypoScript); |
|
| 1181 | 1181 | |
| 1182 | 1182 | \AOE\Crawler\Utility\SignalSlotUtility::emitSignal( |
| 1183 | 1183 | __CLASS__, |
@@ -1192,7 +1192,7 @@ discard block |
||
| 1192 | 1192 | //if mulitprocessing is used we need to store the id of the process which has handled this entry |
| 1193 | 1193 | $field_array['process_id_completed'] = $this->processID; |
| 1194 | 1194 | } |
| 1195 | - $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid=' . intval($queueId), $field_array); |
|
| 1195 | + $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid='.intval($queueId), $field_array); |
|
| 1196 | 1196 | |
| 1197 | 1197 | $result = $this->readUrl_exec($queueRec); |
| 1198 | 1198 | $resultData = unserialize($result['content']); |
@@ -1221,11 +1221,11 @@ discard block |
||
| 1221 | 1221 | array($queueId, &$field_array) |
| 1222 | 1222 | ); |
| 1223 | 1223 | |
| 1224 | - $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid=' . intval($queueId), $field_array); |
|
| 1224 | + $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid='.intval($queueId), $field_array); |
|
| 1225 | 1225 | |
| 1226 | 1226 | |
| 1227 | 1227 | if ($this->debugMode) { |
| 1228 | - \TYPO3\CMS\Core\Utility\GeneralUtility::devlog('crawler-readurl stop ' . microtime(true), __FUNCTION__); |
|
| 1228 | + \TYPO3\CMS\Core\Utility\GeneralUtility::devlog('crawler-readurl stop '.microtime(true), __FUNCTION__); |
|
| 1229 | 1229 | } |
| 1230 | 1230 | |
| 1231 | 1231 | return $ret; |
@@ -1238,7 +1238,7 @@ discard block |
||
| 1238 | 1238 | * |
| 1239 | 1239 | * @return string |
| 1240 | 1240 | */ |
| 1241 | - protected function readUrlFromArray($field_array) { |
|
| 1241 | + protected function readUrlFromArray($field_array) { |
|
| 1242 | 1242 | |
| 1243 | 1243 | // Set exec_time to lock record: |
| 1244 | 1244 | $field_array['exec_time'] = $this->getCurrentTime(); |
@@ -1256,7 +1256,7 @@ discard block |
||
| 1256 | 1256 | array($queueId, &$field_array) |
| 1257 | 1257 | ); |
| 1258 | 1258 | |
| 1259 | - $this->db->exec_UPDATEquery('tx_crawler_queue','qid='.intval($queueId), $field_array); |
|
| 1259 | + $this->db->exec_UPDATEquery('tx_crawler_queue', 'qid='.intval($queueId), $field_array); |
|
| 1260 | 1260 | |
| 1261 | 1261 | return $result; |
| 1262 | 1262 | } |
@@ -1267,17 +1267,17 @@ discard block |
||
| 1267 | 1267 | * @param array $queueRec Queue record |
| 1268 | 1268 | * @return string Result output. |
| 1269 | 1269 | */ |
| 1270 | - protected function readUrl_exec($queueRec) { |
|
| 1270 | + protected function readUrl_exec($queueRec) { |
|
| 1271 | 1271 | // Decode parameters: |
| 1272 | 1272 | $parameters = unserialize($queueRec['parameters']); |
| 1273 | 1273 | $result = 'ERROR'; |
| 1274 | - if (is_array($parameters)) { |
|
| 1275 | - if ($parameters['_CALLBACKOBJ']) { // Calling object: |
|
| 1274 | + if (is_array($parameters)) { |
|
| 1275 | + if ($parameters['_CALLBACKOBJ']) { // Calling object: |
|
| 1276 | 1276 | $objRef = $parameters['_CALLBACKOBJ']; |
| 1277 | 1277 | $callBackObj = &\TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($objRef); |
| 1278 | - if (is_object($callBackObj)) { |
|
| 1278 | + if (is_object($callBackObj)) { |
|
| 1279 | 1279 | unset($parameters['_CALLBACKOBJ']); |
| 1280 | - $result = array('content' => serialize($callBackObj->crawler_execute($parameters,$this))); |
|
| 1280 | + $result = array('content' => serialize($callBackObj->crawler_execute($parameters, $this))); |
|
| 1281 | 1281 | } else { |
| 1282 | 1282 | $result = array('content' => 'No object: '.$objRef); |
| 1283 | 1283 | } |
@@ -1287,9 +1287,9 @@ discard block |
||
| 1287 | 1287 | $crawlerId = $queueRec['qid'].':'.md5($queueRec['qid'].'|'.$queueRec['set_id'].'|'.$GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey']); |
| 1288 | 1288 | |
| 1289 | 1289 | // Get result: |
| 1290 | - $result = $this->requestUrl($parameters['url'],$crawlerId); |
|
| 1290 | + $result = $this->requestUrl($parameters['url'], $crawlerId); |
|
| 1291 | 1291 | |
| 1292 | - tx_crawler_domain_events_dispatcher::getInstance()->post('urlCrawled',$queueRec['set_id'],array('url' => $parameters['url'], 'result' => $result)); |
|
| 1292 | + tx_crawler_domain_events_dispatcher::getInstance()->post('urlCrawled', $queueRec['set_id'], array('url' => $parameters['url'], 'result' => $result)); |
|
| 1293 | 1293 | } |
| 1294 | 1294 | } |
| 1295 | 1295 | |
@@ -1306,7 +1306,7 @@ discard block |
||
| 1306 | 1306 | * @param integer $recursion Recursion limiter for 302 redirects |
| 1307 | 1307 | * @return array Array with content |
| 1308 | 1308 | */ |
| 1309 | - public function requestUrl($originalUrl, $crawlerId, $timeout=2, $recursion=10) { |
|
| 1309 | + public function requestUrl($originalUrl, $crawlerId, $timeout = 2, $recursion = 10) { |
|
| 1310 | 1310 | |
| 1311 | 1311 | if (!$recursion) return false; |
| 1312 | 1312 | |
@@ -1318,7 +1318,7 @@ discard block |
||
| 1318 | 1318 | return FALSE; |
| 1319 | 1319 | } |
| 1320 | 1320 | |
| 1321 | - if (!in_array($url['scheme'], array('','http','https'))) { |
|
| 1321 | + if (!in_array($url['scheme'], array('', 'http', 'https'))) { |
|
| 1322 | 1322 | if (TYPO3_DLOG) \TYPO3\CMS\Core\Utility\GeneralUtility::devLog(sprintf('Scheme does not match for url "%s"', $url), 'crawler', 4, array('crawlerId' => $crawlerId)); |
| 1323 | 1323 | return FALSE; |
| 1324 | 1324 | } |
@@ -1336,14 +1336,14 @@ discard block |
||
| 1336 | 1336 | |
| 1337 | 1337 | if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['curlUse'] && $GLOBALS['TYPO3_CONF_VARS']['SYS']['curlProxyServer']) { |
| 1338 | 1338 | $rurl = parse_url($GLOBALS['TYPO3_CONF_VARS']['SYS']['curlProxyServer']); |
| 1339 | - $url['path'] = $url['scheme'] . '://' . $url['host'] . ($url['port'] > 0 ? ':' . $url['port'] : '') . $url['path']; |
|
| 1339 | + $url['path'] = $url['scheme'].'://'.$url['host'].($url['port'] > 0 ? ':'.$url['port'] : '').$url['path']; |
|
| 1340 | 1340 | $reqHeaders = $this->buildRequestHeaderArray($url, $crawlerId); |
| 1341 | 1341 | } |
| 1342 | 1342 | |
| 1343 | 1343 | $host = $rurl['host']; |
| 1344 | 1344 | |
| 1345 | 1345 | if ($url['scheme'] == 'https') { |
| 1346 | - $host = 'ssl://' . $host; |
|
| 1346 | + $host = 'ssl://'.$host; |
|
| 1347 | 1347 | $port = ($rurl['port'] > 0) ? $rurl['port'] : 443; |
| 1348 | 1348 | } else { |
| 1349 | 1349 | $port = ($rurl['port'] > 0) ? $rurl['port'] : 80; |
@@ -1357,24 +1357,24 @@ discard block |
||
| 1357 | 1357 | return FALSE; |
| 1358 | 1358 | } else { |
| 1359 | 1359 | // Request message: |
| 1360 | - $msg = implode("\r\n",$reqHeaders)."\r\n\r\n"; |
|
| 1361 | - fputs ($fp, $msg); |
|
| 1360 | + $msg = implode("\r\n", $reqHeaders)."\r\n\r\n"; |
|
| 1361 | + fputs($fp, $msg); |
|
| 1362 | 1362 | |
| 1363 | 1363 | // Read response: |
| 1364 | 1364 | $d = $this->getHttpResponseFromStream($fp); |
| 1365 | - fclose ($fp); |
|
| 1365 | + fclose($fp); |
|
| 1366 | 1366 | |
| 1367 | 1367 | $time = microtime(true) - $startTime; |
| 1368 | - $this->log($originalUrl .' '.$time); |
|
| 1368 | + $this->log($originalUrl.' '.$time); |
|
| 1369 | 1369 | |
| 1370 | 1370 | // Implode content and headers: |
| 1371 | 1371 | $result = array( |
| 1372 | 1372 | 'request' => $msg, |
| 1373 | 1373 | 'headers' => implode('', $d['headers']), |
| 1374 | - 'content' => implode('', (array)$d['content']) |
|
| 1374 | + 'content' => implode('', (array) $d['content']) |
|
| 1375 | 1375 | ); |
| 1376 | 1376 | |
| 1377 | - if (($this->extensionSettings['follow30x']) && ($newUrl = $this->getRequestUrlFrom302Header($d['headers'],$url['user'],$url['pass']))) { |
|
| 1377 | + if (($this->extensionSettings['follow30x']) && ($newUrl = $this->getRequestUrlFrom302Header($d['headers'], $url['user'], $url['pass']))) { |
|
| 1378 | 1378 | $result = array_merge(array('parentRequest'=>$result), $this->requestUrl($newUrl, $crawlerId, $recursion--)); |
| 1379 | 1379 | $newRequestUrl = $this->requestUrl($newUrl, $crawlerId, $timeout, --$recursion); |
| 1380 | 1380 | |
@@ -1413,8 +1413,8 @@ discard block |
||
| 1413 | 1413 | |
| 1414 | 1414 | // Base path must be '/<pathSegements>/': |
| 1415 | 1415 | if ($frontendBasePath != '/') { |
| 1416 | - $frontendBasePath = '/' . ltrim($frontendBasePath, '/'); |
|
| 1417 | - $frontendBasePath = rtrim($frontendBasePath, '/') . '/'; |
|
| 1416 | + $frontendBasePath = '/'.ltrim($frontendBasePath, '/'); |
|
| 1417 | + $frontendBasePath = rtrim($frontendBasePath, '/').'/'; |
|
| 1418 | 1418 | } |
| 1419 | 1419 | |
| 1420 | 1420 | return $frontendBasePath; |
@@ -1447,7 +1447,7 @@ discard block |
||
| 1447 | 1447 | |
| 1448 | 1448 | if (is_resource($streamPointer)) { |
| 1449 | 1449 | // read headers |
| 1450 | - while($line = fgets($streamPointer, '2048')) { |
|
| 1450 | + while ($line = fgets($streamPointer, '2048')) { |
|
| 1451 | 1451 | $line = trim($line); |
| 1452 | 1452 | if ($line !== '') { |
| 1453 | 1453 | $response['headers'][] = $line; |
@@ -1457,7 +1457,7 @@ discard block |
||
| 1457 | 1457 | } |
| 1458 | 1458 | |
| 1459 | 1459 | // read content |
| 1460 | - while($line = fgets($streamPointer, '2048')) { |
|
| 1460 | + while ($line = fgets($streamPointer, '2048')) { |
|
| 1461 | 1461 | $response['content'][] = $line; |
| 1462 | 1462 | } |
| 1463 | 1463 | } |
@@ -1470,7 +1470,7 @@ discard block |
||
| 1470 | 1470 | */ |
| 1471 | 1471 | protected function log($message) { |
| 1472 | 1472 | if (!empty($this->extensionSettings['logFileName'])) { |
| 1473 | - @file_put_contents($this->extensionSettings['logFileName'], date('Ymd His') . $message . "\n", FILE_APPEND); |
|
| 1473 | + @file_put_contents($this->extensionSettings['logFileName'], date('Ymd His').$message."\n", FILE_APPEND); |
|
| 1474 | 1474 | } |
| 1475 | 1475 | } |
| 1476 | 1476 | |
@@ -1486,12 +1486,12 @@ discard block |
||
| 1486 | 1486 | $reqHeaders = array(); |
| 1487 | 1487 | $reqHeaders[] = 'GET '.$url['path'].($url['query'] ? '?'.$url['query'] : '').' HTTP/1.0'; |
| 1488 | 1488 | $reqHeaders[] = 'Host: '.$url['host']; |
| 1489 | - if (stristr($url['query'],'ADMCMD_previewWS')) { |
|
| 1489 | + if (stristr($url['query'], 'ADMCMD_previewWS')) { |
|
| 1490 | 1490 | $reqHeaders[] = 'Cookie: $Version="1"; be_typo_user="1"; $Path=/'; |
| 1491 | 1491 | } |
| 1492 | 1492 | $reqHeaders[] = 'Connection: close'; |
| 1493 | - if ($url['user']!='') { |
|
| 1494 | - $reqHeaders[] = 'Authorization: Basic '. base64_encode($url['user'].':'.$url['pass']); |
|
| 1493 | + if ($url['user'] != '') { |
|
| 1494 | + $reqHeaders[] = 'Authorization: Basic '.base64_encode($url['user'].':'.$url['pass']); |
|
| 1495 | 1495 | } |
| 1496 | 1496 | $reqHeaders[] = 'X-T3crawler: '.$crawlerId; |
| 1497 | 1497 | $reqHeaders[] = 'User-Agent: TYPO3 crawler'; |
@@ -1506,21 +1506,21 @@ discard block |
||
| 1506 | 1506 | * @param string HTTP Auth. Password |
| 1507 | 1507 | * @return string URL from redirection |
| 1508 | 1508 | */ |
| 1509 | - protected function getRequestUrlFrom302Header($headers,$user='',$pass='') { |
|
| 1510 | - if(!is_array($headers)) return false; |
|
| 1511 | - if(!(stristr($headers[0],'301 Moved') || stristr($headers[0],'302 Found') || stristr($headers[0],'302 Moved'))) return false; |
|
| 1509 | + protected function getRequestUrlFrom302Header($headers, $user = '', $pass = '') { |
|
| 1510 | + if (!is_array($headers)) return false; |
|
| 1511 | + if (!(stristr($headers[0], '301 Moved') || stristr($headers[0], '302 Found') || stristr($headers[0], '302 Moved'))) return false; |
|
| 1512 | 1512 | |
| 1513 | - foreach($headers as $hl) { |
|
| 1514 | - $tmp = explode(": ",$hl); |
|
| 1513 | + foreach ($headers as $hl) { |
|
| 1514 | + $tmp = explode(": ", $hl); |
|
| 1515 | 1515 | $header[trim($tmp[0])] = trim($tmp[1]); |
| 1516 | - if(trim($tmp[0])=='Location') break; |
|
| 1516 | + if (trim($tmp[0]) == 'Location') break; |
|
| 1517 | 1517 | } |
| 1518 | - if(!array_key_exists('Location',$header)) return false; |
|
| 1518 | + if (!array_key_exists('Location', $header)) return false; |
|
| 1519 | 1519 | |
| 1520 | - if($user!='') { |
|
| 1521 | - if(!($tmp = parse_url($header['Location']))) return false; |
|
| 1522 | - $newUrl = $tmp['scheme'] . '://' . $user . ':' . $pass . '@' . $tmp['host'] . $tmp['path']; |
|
| 1523 | - if($tmp['query']!='') $newUrl .= '?' . $tmp['query']; |
|
| 1520 | + if ($user != '') { |
|
| 1521 | + if (!($tmp = parse_url($header['Location']))) return false; |
|
| 1522 | + $newUrl = $tmp['scheme'].'://'.$user.':'.$pass.'@'.$tmp['host'].$tmp['path']; |
|
| 1523 | + if ($tmp['query'] != '') $newUrl .= '?'.$tmp['query']; |
|
| 1524 | 1524 | } else { |
| 1525 | 1525 | $newUrl = $header['Location']; |
| 1526 | 1526 | } |
@@ -1541,15 +1541,15 @@ discard block |
||
| 1541 | 1541 | * @param object TSFE object (reference under PHP5) |
| 1542 | 1542 | * @return void |
| 1543 | 1543 | */ |
| 1544 | - function fe_init(&$params, $ref) { |
|
| 1544 | + function fe_init(&$params, $ref) { |
|
| 1545 | 1545 | |
| 1546 | 1546 | // Authenticate crawler request: |
| 1547 | - if (isset($_SERVER['HTTP_X_T3CRAWLER'])) { |
|
| 1548 | - list($queueId,$hash) = explode(':', $_SERVER['HTTP_X_T3CRAWLER']); |
|
| 1549 | - list($queueRec) = $this->db->exec_SELECTgetRows('*','tx_crawler_queue','qid='.intval($queueId)); |
|
| 1547 | + if (isset($_SERVER['HTTP_X_T3CRAWLER'])) { |
|
| 1548 | + list($queueId, $hash) = explode(':', $_SERVER['HTTP_X_T3CRAWLER']); |
|
| 1549 | + list($queueRec) = $this->db->exec_SELECTgetRows('*', 'tx_crawler_queue', 'qid='.intval($queueId)); |
|
| 1550 | 1550 | |
| 1551 | 1551 | // If a crawler record was found and hash was matching, set it up: |
| 1552 | - if (is_array($queueRec) && $hash === md5($queueRec['qid'].'|'.$queueRec['set_id'].'|'.$GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey'])) { |
|
| 1552 | + if (is_array($queueRec) && $hash === md5($queueRec['qid'].'|'.$queueRec['set_id'].'|'.$GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey'])) { |
|
| 1553 | 1553 | $params['pObj']->applicationData['tx_crawler']['running'] = TRUE; |
| 1554 | 1554 | $params['pObj']->applicationData['tx_crawler']['parameters'] = unserialize($queueRec['parameters']); |
| 1555 | 1555 | $params['pObj']->applicationData['tx_crawler']['log'] = array(); |
@@ -1607,7 +1607,7 @@ discard block |
||
| 1607 | 1607 | /* @var $tree \TYPO3\CMS\Backend\Tree\View\PageTreeView */ |
| 1608 | 1608 | $tree = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\CMS\Backend\Tree\View\PageTreeView'); |
| 1609 | 1609 | $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1); |
| 1610 | - $tree->init('AND ' . $perms_clause); |
|
| 1610 | + $tree->init('AND '.$perms_clause); |
|
| 1611 | 1611 | |
| 1612 | 1612 | $pageinfo = \TYPO3\CMS\Backend\Utility\BackendUtility::readPageAccess($id, $perms_clause); |
| 1613 | 1613 | |
@@ -1618,7 +1618,7 @@ discard block |
||
| 1618 | 1618 | ); |
| 1619 | 1619 | |
| 1620 | 1620 | // Get branch beneath: |
| 1621 | - if ($depth) { |
|
| 1621 | + if ($depth) { |
|
| 1622 | 1622 | $tree->getTree($id, $depth, ''); |
| 1623 | 1623 | } |
| 1624 | 1624 | |
@@ -1630,7 +1630,7 @@ discard block |
||
| 1630 | 1630 | $this->MP = false; |
| 1631 | 1631 | |
| 1632 | 1632 | // recognize mount points |
| 1633 | - if($data['row']['doktype'] == 7){ |
|
| 1633 | + if ($data['row']['doktype'] == 7) { |
|
| 1634 | 1634 | $mountpage = $this->db->exec_SELECTgetRows('*', 'pages', 'uid = '.$data['row']['uid']); |
| 1635 | 1635 | |
| 1636 | 1636 | // fetch mounted pages |
@@ -1640,15 +1640,15 @@ discard block |
||
| 1640 | 1640 | $mountTree->init('AND '.$perms_clause); |
| 1641 | 1641 | $mountTree->getTree($mountpage[0]['mount_pid'], $depth, ''); |
| 1642 | 1642 | |
| 1643 | - foreach($mountTree->tree as $mountData) { |
|
| 1643 | + foreach ($mountTree->tree as $mountData) { |
|
| 1644 | 1644 | $code .= $this->drawURLs_addRowsForPage( |
| 1645 | 1645 | $mountData['row'], |
| 1646 | - $mountData['HTML'].\TYPO3\CMS\Backend\Utility\BackendUtility::getRecordTitle('pages',$mountData['row'],TRUE) |
|
| 1646 | + $mountData['HTML'].\TYPO3\CMS\Backend\Utility\BackendUtility::getRecordTitle('pages', $mountData['row'], TRUE) |
|
| 1647 | 1647 | ); |
| 1648 | 1648 | } |
| 1649 | 1649 | |
| 1650 | 1650 | // replace page when mount_pid_ol is enabled |
| 1651 | - if($mountpage[0]['mount_pid_ol']){ |
|
| 1651 | + if ($mountpage[0]['mount_pid_ol']) { |
|
| 1652 | 1652 | $data['row']['uid'] = $mountpage[0]['mount_pid']; |
| 1653 | 1653 | } else { |
| 1654 | 1654 | // if the mount_pid_ol is not set the MP must not be used for the mountpoint page |
@@ -1658,7 +1658,7 @@ discard block |
||
| 1658 | 1658 | |
| 1659 | 1659 | $code .= $this->drawURLs_addRowsForPage( |
| 1660 | 1660 | $data['row'], |
| 1661 | - $data['HTML'] . \TYPO3\CMS\Backend\Utility\BackendUtility::getRecordTitle('pages', $data['row'], TRUE) |
|
| 1661 | + $data['HTML'].\TYPO3\CMS\Backend\Utility\BackendUtility::getRecordTitle('pages', $data['row'], TRUE) |
|
| 1662 | 1662 | ); |
| 1663 | 1663 | } |
| 1664 | 1664 | |
@@ -1682,7 +1682,7 @@ discard block |
||
| 1682 | 1682 | if (!empty($excludeString)) { |
| 1683 | 1683 | /* @var $tree \TYPO3\CMS\Backend\Tree\View\PageTreeView */ |
| 1684 | 1684 | $tree = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\CMS\Backend\Tree\View\PageTreeView'); |
| 1685 | - $tree->init('AND ' . $this->backendUser->getPagePermsClause(1)); |
|
| 1685 | + $tree->init('AND '.$this->backendUser->getPagePermsClause(1)); |
|
| 1686 | 1686 | |
| 1687 | 1687 | $excludeParts = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $excludeString); |
| 1688 | 1688 | |
@@ -1691,7 +1691,7 @@ discard block |
||
| 1691 | 1691 | |
| 1692 | 1692 | // default is "page only" = "depth=0" |
| 1693 | 1693 | if (empty($depth)) { |
| 1694 | - $depth = ( stristr($excludePart,'+')) ? 99 : 0; |
|
| 1694 | + $depth = (stristr($excludePart, '+')) ? 99 : 0; |
|
| 1695 | 1695 | } |
| 1696 | 1696 | |
| 1697 | 1697 | $pidList[] = $pid; |
@@ -1724,7 +1724,7 @@ discard block |
||
| 1724 | 1724 | * @param string Page icon and title for row |
| 1725 | 1725 | * @return string HTML <tr> content (one or more) |
| 1726 | 1726 | */ |
| 1727 | - public function drawURLs_addRowsForPage(array $pageRow, $pageTitleAndIcon) { |
|
| 1727 | + public function drawURLs_addRowsForPage(array $pageRow, $pageTitleAndIcon) { |
|
| 1728 | 1728 | |
| 1729 | 1729 | $skipMessage = ''; |
| 1730 | 1730 | |
@@ -1745,7 +1745,7 @@ discard block |
||
| 1745 | 1745 | $cc = 0; |
| 1746 | 1746 | $content = ''; |
| 1747 | 1747 | if (count($configurations)) { |
| 1748 | - foreach($configurations as $confKey => $confArray) { |
|
| 1748 | + foreach ($configurations as $confKey => $confArray) { |
|
| 1749 | 1749 | |
| 1750 | 1750 | // Title column: |
| 1751 | 1751 | if (!$c) { |
@@ -1774,47 +1774,47 @@ discard block |
||
| 1774 | 1774 | $paramExpanded = ''; |
| 1775 | 1775 | $calcAccu = array(); |
| 1776 | 1776 | $calcRes = 1; |
| 1777 | - foreach($confArray['paramExpanded'] as $gVar => $gVal) { |
|
| 1778 | - $paramExpanded.= ' |
|
| 1777 | + foreach ($confArray['paramExpanded'] as $gVar => $gVal) { |
|
| 1778 | + $paramExpanded .= ' |
|
| 1779 | 1779 | <tr> |
| 1780 | 1780 | <td class="bgColor4-20">'.htmlspecialchars('&'.$gVar.'=').'<br/>'. |
| 1781 | 1781 | '('.count($gVal).')'. |
| 1782 | 1782 | '</td> |
| 1783 | - <td class="bgColor4" nowrap="nowrap">'.nl2br(htmlspecialchars(implode(chr(10),$gVal))).'</td> |
|
| 1783 | + <td class="bgColor4" nowrap="nowrap">'.nl2br(htmlspecialchars(implode(chr(10), $gVal))).'</td> |
|
| 1784 | 1784 | </tr> |
| 1785 | 1785 | '; |
| 1786 | - $calcRes*= count($gVal); |
|
| 1786 | + $calcRes *= count($gVal); |
|
| 1787 | 1787 | $calcAccu[] = count($gVal); |
| 1788 | 1788 | } |
| 1789 | 1789 | $paramExpanded = '<table class="lrPadding c-list param-expanded">'.$paramExpanded.'</table>'; |
| 1790 | - $paramExpanded.= 'Comb: '.implode('*',$calcAccu).'='.$calcRes; |
|
| 1790 | + $paramExpanded .= 'Comb: '.implode('*', $calcAccu).'='.$calcRes; |
|
| 1791 | 1791 | |
| 1792 | 1792 | // Options |
| 1793 | 1793 | $optionValues = ''; |
| 1794 | - if ($confArray['subCfg']['userGroups']) { |
|
| 1795 | - $optionValues.='User Groups: '.$confArray['subCfg']['userGroups'].'<br/>'; |
|
| 1794 | + if ($confArray['subCfg']['userGroups']) { |
|
| 1795 | + $optionValues .= 'User Groups: '.$confArray['subCfg']['userGroups'].'<br/>'; |
|
| 1796 | 1796 | } |
| 1797 | - if ($confArray['subCfg']['baseUrl']) { |
|
| 1798 | - $optionValues.='Base Url: '.$confArray['subCfg']['baseUrl'].'<br/>'; |
|
| 1797 | + if ($confArray['subCfg']['baseUrl']) { |
|
| 1798 | + $optionValues .= 'Base Url: '.$confArray['subCfg']['baseUrl'].'<br/>'; |
|
| 1799 | 1799 | } |
| 1800 | - if ($confArray['subCfg']['procInstrFilter']) { |
|
| 1801 | - $optionValues.='ProcInstr: '.$confArray['subCfg']['procInstrFilter'].'<br/>'; |
|
| 1800 | + if ($confArray['subCfg']['procInstrFilter']) { |
|
| 1801 | + $optionValues .= 'ProcInstr: '.$confArray['subCfg']['procInstrFilter'].'<br/>'; |
|
| 1802 | 1802 | } |
| 1803 | 1803 | |
| 1804 | 1804 | // Compile row: |
| 1805 | 1805 | $content .= ' |
| 1806 | - <tr class="bgColor' . ($c%2 ? '-20':'-10') . '"> |
|
| 1807 | - ' . $titleClm . ' |
|
| 1808 | - <td>' . htmlspecialchars($confKey) . '</td> |
|
| 1809 | - <td>' . nl2br(htmlspecialchars(rawurldecode(trim(str_replace('&', chr(10) . '&', \TYPO3\CMS\Core\Utility\GeneralUtility::implodeArrayForUrl('', $confArray['paramParsed'])))))) . '</td> |
|
| 1806 | + <tr class="bgColor' . ($c % 2 ? '-20' : '-10').'"> |
|
| 1807 | + ' . $titleClm.' |
|
| 1808 | + <td>' . htmlspecialchars($confKey).'</td> |
|
| 1809 | + <td>' . nl2br(htmlspecialchars(rawurldecode(trim(str_replace('&', chr(10).'&', \TYPO3\CMS\Core\Utility\GeneralUtility::implodeArrayForUrl('', $confArray['paramParsed'])))))).'</td> |
|
| 1810 | 1810 | <td>'.$paramExpanded.'</td> |
| 1811 | - <td nowrap="nowrap">' . $urlList . '</td> |
|
| 1812 | - <td nowrap="nowrap">' . $optionValues . '</td> |
|
| 1813 | - <td nowrap="nowrap">' . \TYPO3\CMS\Core\Utility\DebugUtility::viewArray($confArray['subCfg']['procInstrParams.']) . '</td> |
|
| 1811 | + <td nowrap="nowrap">' . $urlList.'</td> |
|
| 1812 | + <td nowrap="nowrap">' . $optionValues.'</td> |
|
| 1813 | + <td nowrap="nowrap">' . \TYPO3\CMS\Core\Utility\DebugUtility::viewArray($confArray['subCfg']['procInstrParams.']).'</td> |
|
| 1814 | 1814 | </tr>'; |
| 1815 | 1815 | } else { |
| 1816 | 1816 | |
| 1817 | - $content .= '<tr class="bgColor'.($c%2 ? '-20':'-10') . '"> |
|
| 1817 | + $content .= '<tr class="bgColor'.($c % 2 ? '-20' : '-10').'"> |
|
| 1818 | 1818 | '.$titleClm.' |
| 1819 | 1819 | <td>'.htmlspecialchars($confKey).'</td> |
| 1820 | 1820 | <td colspan="5"><em>No entries</em> (Page is excluded in this configuration)</td> |
@@ -1829,7 +1829,7 @@ discard block |
||
| 1829 | 1829 | $message = !empty($skipMessage) ? ' ('.$skipMessage.')' : ''; |
| 1830 | 1830 | |
| 1831 | 1831 | // Compile row: |
| 1832 | - $content.= ' |
|
| 1832 | + $content .= ' |
|
| 1833 | 1833 | <tr class="bgColor-20" style="border-bottom: 1px solid black;"> |
| 1834 | 1834 | <td>'.$pageTitleAndIcon.'</td> |
| 1835 | 1835 | <td colspan="6"><em>No entries</em>'.$message.'</td> |
@@ -1890,7 +1890,7 @@ discard block |
||
| 1890 | 1890 | // Run process: |
| 1891 | 1891 | $result = $this->CLI_run($countInARun, $sleepTime, $sleepAfterFinish); |
| 1892 | 1892 | } catch (Exception $e) { |
| 1893 | - $this->CLI_debug(get_class($e) . ': ' . $e->getMessage()); |
|
| 1893 | + $this->CLI_debug(get_class($e).': '.$e->getMessage()); |
|
| 1894 | 1894 | $result = self::CLI_STATUS_ABORTED; |
| 1895 | 1895 | } |
| 1896 | 1896 | |
@@ -1901,7 +1901,7 @@ discard block |
||
| 1901 | 1901 | $releaseStatus = $this->CLI_releaseProcesses($this->CLI_buildProcessId()); |
| 1902 | 1902 | |
| 1903 | 1903 | $this->CLI_debug("Unprocessed Items remaining:".$this->getUnprocessedItemsCount()." (".$this->CLI_buildProcessId().")"); |
| 1904 | - $result |= ( $this->getUnprocessedItemsCount() > 0 ? self::CLI_STATUS_REMAIN : self::CLI_STATUS_NOTHING_PROCCESSED ); |
|
| 1904 | + $result |= ($this->getUnprocessedItemsCount() > 0 ? self::CLI_STATUS_REMAIN : self::CLI_STATUS_NOTHING_PROCCESSED); |
|
| 1905 | 1905 | } else { |
| 1906 | 1906 | $result |= self::CLI_STATUS_ABORTED; |
| 1907 | 1907 | } |
@@ -1914,7 +1914,7 @@ discard block |
||
| 1914 | 1914 | * |
| 1915 | 1915 | * @return void |
| 1916 | 1916 | */ |
| 1917 | - function CLI_main_im() { |
|
| 1917 | + function CLI_main_im() { |
|
| 1918 | 1918 | $this->setAccessMode('cli_im'); |
| 1919 | 1919 | |
| 1920 | 1920 | $cliObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_cli_im'); |
@@ -1924,7 +1924,7 @@ discard block |
||
| 1924 | 1924 | $this->backendUser->setWorkspace(0); |
| 1925 | 1925 | |
| 1926 | 1926 | // Print help |
| 1927 | - if (!isset($cliObj->cli_args['_DEFAULT'][1])) { |
|
| 1927 | + if (!isset($cliObj->cli_args['_DEFAULT'][1])) { |
|
| 1928 | 1928 | $cliObj->cli_validateArgs(); |
| 1929 | 1929 | $cliObj->cli_help(); |
| 1930 | 1930 | exit; |
@@ -1932,8 +1932,8 @@ discard block |
||
| 1932 | 1932 | |
| 1933 | 1933 | $cliObj->cli_validateArgs(); |
| 1934 | 1934 | |
| 1935 | - if ($cliObj->cli_argValue('-o')==='exec') { |
|
| 1936 | - $this->registerQueueEntriesInternallyOnly=TRUE; |
|
| 1935 | + if ($cliObj->cli_argValue('-o') === 'exec') { |
|
| 1936 | + $this->registerQueueEntriesInternallyOnly = TRUE; |
|
| 1937 | 1937 | } |
| 1938 | 1938 | |
| 1939 | 1939 | if (isset($cliObj->cli_args['_DEFAULT'][2])) { |
@@ -1946,16 +1946,16 @@ discard block |
||
| 1946 | 1946 | |
| 1947 | 1947 | $configurationKeys = $this->getConfigurationKeys($cliObj); |
| 1948 | 1948 | |
| 1949 | - if(!is_array($configurationKeys)){ |
|
| 1949 | + if (!is_array($configurationKeys)) { |
|
| 1950 | 1950 | $configurations = $this->getUrlsForPageId($pageId); |
| 1951 | - if(is_array($configurations)){ |
|
| 1951 | + if (is_array($configurations)) { |
|
| 1952 | 1952 | $configurationKeys = array_keys($configurations); |
| 1953 | - }else{ |
|
| 1953 | + } else { |
|
| 1954 | 1954 | $configurationKeys = array(); |
| 1955 | 1955 | } |
| 1956 | 1956 | } |
| 1957 | 1957 | |
| 1958 | - if($cliObj->cli_argValue('-o')==='queue' || $cliObj->cli_argValue('-o')==='exec'){ |
|
| 1958 | + if ($cliObj->cli_argValue('-o') === 'queue' || $cliObj->cli_argValue('-o') === 'exec') { |
|
| 1959 | 1959 | |
| 1960 | 1960 | $reason = new tx_crawler_domain_reason(); |
| 1961 | 1961 | $reason->setReason(tx_crawler_domain_reason::REASON_GUI_SUBMIT); |
@@ -1963,7 +1963,7 @@ discard block |
||
| 1963 | 1963 | tx_crawler_domain_events_dispatcher::getInstance()->post( |
| 1964 | 1964 | 'invokeQueueChange', |
| 1965 | 1965 | $this->setID, |
| 1966 | - array( 'reason' => $reason ) |
|
| 1966 | + array('reason' => $reason) |
|
| 1967 | 1967 | ); |
| 1968 | 1968 | } |
| 1969 | 1969 | |
@@ -1974,42 +1974,42 @@ discard block |
||
| 1974 | 1974 | $this->setID = \TYPO3\CMS\Core\Utility\GeneralUtility::md5int(microtime()); |
| 1975 | 1975 | $this->getPageTreeAndUrls( |
| 1976 | 1976 | $pageId, |
| 1977 | - \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_argValue('-d'),0,99), |
|
| 1977 | + \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_argValue('-d'), 0, 99), |
|
| 1978 | 1978 | $this->getCurrentTime(), |
| 1979 | - \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_isArg('-n') ? $cliObj->cli_argValue('-n') : 30,1,1000), |
|
| 1980 | - $cliObj->cli_argValue('-o')==='queue' || $cliObj->cli_argValue('-o')==='exec', |
|
| 1981 | - $cliObj->cli_argValue('-o')==='url', |
|
| 1982 | - \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',',$cliObj->cli_argValue('-proc'),1), |
|
| 1979 | + \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_isArg('-n') ? $cliObj->cli_argValue('-n') : 30, 1, 1000), |
|
| 1980 | + $cliObj->cli_argValue('-o') === 'queue' || $cliObj->cli_argValue('-o') === 'exec', |
|
| 1981 | + $cliObj->cli_argValue('-o') === 'url', |
|
| 1982 | + \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $cliObj->cli_argValue('-proc'), 1), |
|
| 1983 | 1983 | $configurationKeys |
| 1984 | 1984 | ); |
| 1985 | 1985 | |
| 1986 | - if ($cliObj->cli_argValue('-o')==='url') { |
|
| 1987 | - $cliObj->cli_echo(implode(chr(10),$this->downloadUrls).chr(10),1); |
|
| 1988 | - } elseif ($cliObj->cli_argValue('-o')==='exec') { |
|
| 1986 | + if ($cliObj->cli_argValue('-o') === 'url') { |
|
| 1987 | + $cliObj->cli_echo(implode(chr(10), $this->downloadUrls).chr(10), 1); |
|
| 1988 | + } elseif ($cliObj->cli_argValue('-o') === 'exec') { |
|
| 1989 | 1989 | $cliObj->cli_echo("Executing ".count($this->urlList)." requests right away:\n\n"); |
| 1990 | - $cliObj->cli_echo(implode(chr(10),$this->urlList).chr(10)); |
|
| 1990 | + $cliObj->cli_echo(implode(chr(10), $this->urlList).chr(10)); |
|
| 1991 | 1991 | $cliObj->cli_echo("\nProcessing:\n"); |
| 1992 | 1992 | |
| 1993 | - foreach($this->queueEntries as $queueRec) { |
|
| 1993 | + foreach ($this->queueEntries as $queueRec) { |
|
| 1994 | 1994 | $p = unserialize($queueRec['parameters']); |
| 1995 | - $cliObj->cli_echo($p['url'].' ('.implode(',',$p['procInstructions']).') => '); |
|
| 1995 | + $cliObj->cli_echo($p['url'].' ('.implode(',', $p['procInstructions']).') => '); |
|
| 1996 | 1996 | |
| 1997 | 1997 | $result = $this->readUrlFromArray($queueRec); |
| 1998 | 1998 | |
| 1999 | 1999 | $requestResult = unserialize($result['content']); |
| 2000 | - if (is_array($requestResult)) { |
|
| 2001 | - $resLog = is_array($requestResult['log']) ? chr(10).chr(9).chr(9).implode(chr(10).chr(9).chr(9),$requestResult['log']) : ''; |
|
| 2000 | + if (is_array($requestResult)) { |
|
| 2001 | + $resLog = is_array($requestResult['log']) ? chr(10).chr(9).chr(9).implode(chr(10).chr(9).chr(9), $requestResult['log']) : ''; |
|
| 2002 | 2002 | $cliObj->cli_echo('OK: '.$resLog.chr(10)); |
| 2003 | 2003 | } else { |
| 2004 | - $cliObj->cli_echo('Error checking Crawler Result: '.substr(preg_replace('/\s+/',' ',strip_tags($result['content'])),0,30000).'...'.chr(10)); |
|
| 2004 | + $cliObj->cli_echo('Error checking Crawler Result: '.substr(preg_replace('/\s+/', ' ', strip_tags($result['content'])), 0, 30000).'...'.chr(10)); |
|
| 2005 | 2005 | } |
| 2006 | 2006 | } |
| 2007 | - } elseif ($cliObj->cli_argValue('-o')==='queue') { |
|
| 2007 | + } elseif ($cliObj->cli_argValue('-o') === 'queue') { |
|
| 2008 | 2008 | $cliObj->cli_echo("Putting ".count($this->urlList)." entries in queue:\n\n"); |
| 2009 | - $cliObj->cli_echo(implode(chr(10),$this->urlList).chr(10)); |
|
| 2009 | + $cliObj->cli_echo(implode(chr(10), $this->urlList).chr(10)); |
|
| 2010 | 2010 | } else { |
| 2011 | - $cliObj->cli_echo(count($this->urlList)." entries found for processing. (Use -o to decide action):\n\n",1); |
|
| 2012 | - $cliObj->cli_echo(implode(chr(10),$this->urlList).chr(10),1); |
|
| 2011 | + $cliObj->cli_echo(count($this->urlList)." entries found for processing. (Use -o to decide action):\n\n", 1); |
|
| 2012 | + $cliObj->cli_echo(implode(chr(10), $this->urlList).chr(10), 1); |
|
| 2013 | 2013 | } |
| 2014 | 2014 | } |
| 2015 | 2015 | |
@@ -2034,12 +2034,12 @@ discard block |
||
| 2034 | 2034 | } |
| 2035 | 2035 | |
| 2036 | 2036 | $cliObj->cli_validateArgs(); |
| 2037 | - $pageId = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_args['_DEFAULT'][1],0); |
|
| 2037 | + $pageId = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($cliObj->cli_args['_DEFAULT'][1], 0); |
|
| 2038 | 2038 | $fullFlush = ($pageId == 0); |
| 2039 | 2039 | |
| 2040 | 2040 | $mode = $cliObj->cli_argValue('-o'); |
| 2041 | 2041 | |
| 2042 | - switch($mode) { |
|
| 2042 | + switch ($mode) { |
|
| 2043 | 2043 | case 'all': |
| 2044 | 2044 | $result = $this->getLogEntriesForPageId($pageId, '', true, $fullFlush); |
| 2045 | 2045 | break; |
@@ -2062,7 +2062,7 @@ discard block |
||
| 2062 | 2062 | * @param tx_crawler_cli_im $cliObj Command line object |
| 2063 | 2063 | * @return mixed Array of keys or null if no keys found |
| 2064 | 2064 | */ |
| 2065 | - protected function getConfigurationKeys(tx_crawler_cli_im &$cliObj) { |
|
| 2065 | + protected function getConfigurationKeys(tx_crawler_cli_im & $cliObj) { |
|
| 2066 | 2066 | $parameter = trim($cliObj->cli_argValue('-conf')); |
| 2067 | 2067 | return ($parameter != '' ? \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $parameter) : array()); |
| 2068 | 2068 | } |
@@ -2087,7 +2087,7 @@ discard block |
||
| 2087 | 2087 | $purgeDate = $this->getCurrentTime() - 24 * 60 * 60 * intval($this->extensionSettings['purgeQueueDays']); |
| 2088 | 2088 | $del = $this->db->exec_DELETEquery( |
| 2089 | 2089 | 'tx_crawler_queue', |
| 2090 | - 'exec_time!=0 AND exec_time<' . $purgeDate |
|
| 2090 | + 'exec_time!=0 AND exec_time<'.$purgeDate |
|
| 2091 | 2091 | ); |
| 2092 | 2092 | } |
| 2093 | 2093 | |
@@ -2104,10 +2104,10 @@ discard block |
||
| 2104 | 2104 | intval($countInARun) |
| 2105 | 2105 | ); |
| 2106 | 2106 | |
| 2107 | - if (count($rows)>0) { |
|
| 2107 | + if (count($rows) > 0) { |
|
| 2108 | 2108 | $quidList = array(); |
| 2109 | 2109 | |
| 2110 | - foreach($rows as $r) { |
|
| 2110 | + foreach ($rows as $r) { |
|
| 2111 | 2111 | $quidList[] = $r['qid']; |
| 2112 | 2112 | } |
| 2113 | 2113 | |
@@ -2118,7 +2118,7 @@ discard block |
||
| 2118 | 2118 | //TODO make sure we're not taking assigned queue-entires |
| 2119 | 2119 | $this->db->exec_UPDATEquery( |
| 2120 | 2120 | 'tx_crawler_queue', |
| 2121 | - 'qid IN ('.implode(',',$quidList).')', |
|
| 2121 | + 'qid IN ('.implode(',', $quidList).')', |
|
| 2122 | 2122 | array( |
| 2123 | 2123 | 'process_scheduled' => intval($this->getCurrentTime()), |
| 2124 | 2124 | 'process_id' => $processId |
@@ -2129,32 +2129,32 @@ discard block |
||
| 2129 | 2129 | $numberOfAffectedRows = $this->db->sql_affected_rows(); |
| 2130 | 2130 | $this->db->exec_UPDATEquery( |
| 2131 | 2131 | 'tx_crawler_process', |
| 2132 | - "process_id = '".$processId."'" , |
|
| 2132 | + "process_id = '".$processId."'", |
|
| 2133 | 2133 | array( |
| 2134 | 2134 | 'assigned_items_count' => intval($numberOfAffectedRows) |
| 2135 | 2135 | ) |
| 2136 | 2136 | ); |
| 2137 | 2137 | |
| 2138 | - if($numberOfAffectedRows == count($quidList)) { |
|
| 2138 | + if ($numberOfAffectedRows == count($quidList)) { |
|
| 2139 | 2139 | $this->db->sql_query('COMMIT'); |
| 2140 | - } else { |
|
| 2140 | + } else { |
|
| 2141 | 2141 | $this->db->sql_query('ROLLBACK'); |
| 2142 | 2142 | $this->CLI_debug("Nothing processed due to multi-process collision (".$this->CLI_buildProcessId().")"); |
| 2143 | - return ( $result | self::CLI_STATUS_ABORTED ); |
|
| 2143 | + return ($result | self::CLI_STATUS_ABORTED); |
|
| 2144 | 2144 | } |
| 2145 | 2145 | |
| 2146 | 2146 | |
| 2147 | 2147 | |
| 2148 | - foreach($rows as $r) { |
|
| 2148 | + foreach ($rows as $r) { |
|
| 2149 | 2149 | $result |= $this->readUrl($r['qid']); |
| 2150 | 2150 | |
| 2151 | 2151 | $counter++; |
| 2152 | - usleep(intval($sleepTime)); // Just to relax the system |
|
| 2152 | + usleep(intval($sleepTime)); // Just to relax the system |
|
| 2153 | 2153 | |
| 2154 | 2154 | // if during the start and the current read url the cli has been disable we need to return from the function |
| 2155 | 2155 | // mark the process NOT as ended. |
| 2156 | 2156 | if ($this->getDisabled()) { |
| 2157 | - return ( $result | self::CLI_STATUS_ABORTED ); |
|
| 2157 | + return ($result | self::CLI_STATUS_ABORTED); |
|
| 2158 | 2158 | } |
| 2159 | 2159 | |
| 2160 | 2160 | if (!$this->CLI_checkIfProcessIsActive($this->CLI_buildProcessId())) { |
@@ -2162,7 +2162,7 @@ discard block |
||
| 2162 | 2162 | |
| 2163 | 2163 | //TODO might need an additional returncode |
| 2164 | 2164 | $result |= self::CLI_STATUS_ABORTED; |
| 2165 | - break; //possible timeout |
|
| 2165 | + break; //possible timeout |
|
| 2166 | 2166 | } |
| 2167 | 2167 | } |
| 2168 | 2168 | |
@@ -2175,7 +2175,7 @@ discard block |
||
| 2175 | 2175 | $this->CLI_debug("Nothing within queue which needs to be processed (".$this->CLI_buildProcessId().")"); |
| 2176 | 2176 | } |
| 2177 | 2177 | |
| 2178 | - if($counter > 0) { |
|
| 2178 | + if ($counter > 0) { |
|
| 2179 | 2179 | $result |= self::CLI_STATUS_PROCESSED; |
| 2180 | 2180 | } |
| 2181 | 2181 | |
@@ -2187,12 +2187,12 @@ discard block |
||
| 2187 | 2187 | * |
| 2188 | 2188 | * @return void |
| 2189 | 2189 | */ |
| 2190 | - function CLI_runHooks() { |
|
| 2190 | + function CLI_runHooks() { |
|
| 2191 | 2191 | global $TYPO3_CONF_VARS; |
| 2192 | - if (is_array($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'])) { |
|
| 2193 | - foreach($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'] as $objRef) { |
|
| 2192 | + if (is_array($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'])) { |
|
| 2193 | + foreach ($TYPO3_CONF_VARS['EXTCONF']['crawler']['cli_hooks'] as $objRef) { |
|
| 2194 | 2194 | $hookObj = &\TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($objRef); |
| 2195 | - if (is_object($hookObj)) { |
|
| 2195 | + if (is_object($hookObj)) { |
|
| 2196 | 2196 | $hookObj->crawler_init($this); |
| 2197 | 2197 | } |
| 2198 | 2198 | } |
@@ -2229,7 +2229,7 @@ discard block |
||
| 2229 | 2229 | |
| 2230 | 2230 | $currentTime = $this->getCurrentTime(); |
| 2231 | 2231 | |
| 2232 | - while($row = $this->db->sql_fetch_assoc($res)) { |
|
| 2232 | + while ($row = $this->db->sql_fetch_assoc($res)) { |
|
| 2233 | 2233 | if ($row['ttl'] < $currentTime) { |
| 2234 | 2234 | $orphanProcesses[] = $row['process_id']; |
| 2235 | 2235 | } else { |
@@ -2239,7 +2239,7 @@ discard block |
||
| 2239 | 2239 | |
| 2240 | 2240 | // if there are less than allowed active processes then add a new one |
| 2241 | 2241 | if ($processCount < intval($this->extensionSettings['processLimit'])) { |
| 2242 | - $this->CLI_debug("add process ".$this->CLI_buildProcessId()." (".($processCount+1)."/".intval($this->extensionSettings['processLimit']).")"); |
|
| 2242 | + $this->CLI_debug("add process ".$this->CLI_buildProcessId()." (".($processCount + 1)."/".intval($this->extensionSettings['processLimit']).")"); |
|
| 2243 | 2243 | |
| 2244 | 2244 | // create new process record |
| 2245 | 2245 | $this->db->exec_INSERTquery( |
@@ -2272,17 +2272,17 @@ discard block |
||
| 2272 | 2272 | * @param boolean $withinLock show whether the DB-actions are included within an existing lock |
| 2273 | 2273 | * @return boolean |
| 2274 | 2274 | */ |
| 2275 | - function CLI_releaseProcesses($releaseIds, $withinLock=false) { |
|
| 2275 | + function CLI_releaseProcesses($releaseIds, $withinLock = false) { |
|
| 2276 | 2276 | |
| 2277 | 2277 | if (!is_array($releaseIds)) { |
| 2278 | 2278 | $releaseIds = array($releaseIds); |
| 2279 | 2279 | } |
| 2280 | 2280 | |
| 2281 | 2281 | if (!count($releaseIds) > 0) { |
| 2282 | - return false; //nothing to release |
|
| 2282 | + return false; //nothing to release |
|
| 2283 | 2283 | } |
| 2284 | 2284 | |
| 2285 | - if(!$withinLock) $this->db->sql_query('BEGIN'); |
|
| 2285 | + if (!$withinLock) $this->db->sql_query('BEGIN'); |
|
| 2286 | 2286 | |
| 2287 | 2287 | // some kind of 2nd chance algo - this way you need at least 2 processes to have a real cleanup |
| 2288 | 2288 | // this ensures that a single process can't mess up the entire process table |
@@ -2312,21 +2312,21 @@ discard block |
||
| 2312 | 2312 | // mark all requested processes as non-active |
| 2313 | 2313 | $this->db->exec_UPDATEquery( |
| 2314 | 2314 | 'tx_crawler_process', |
| 2315 | - 'process_id IN (\''.implode('\',\'',$releaseIds).'\') AND deleted=0', |
|
| 2315 | + 'process_id IN (\''.implode('\',\'', $releaseIds).'\') AND deleted=0', |
|
| 2316 | 2316 | array( |
| 2317 | 2317 | 'active'=>'0' |
| 2318 | 2318 | ) |
| 2319 | 2319 | ); |
| 2320 | 2320 | $this->db->exec_UPDATEquery( |
| 2321 | 2321 | 'tx_crawler_queue', |
| 2322 | - 'exec_time=0 AND process_id IN ("'.implode('","',$releaseIds).'")', |
|
| 2322 | + 'exec_time=0 AND process_id IN ("'.implode('","', $releaseIds).'")', |
|
| 2323 | 2323 | array( |
| 2324 | 2324 | 'process_scheduled'=>0, |
| 2325 | 2325 | 'process_id'=>'' |
| 2326 | 2326 | ) |
| 2327 | 2327 | ); |
| 2328 | 2328 | |
| 2329 | - if(!$withinLock) $this->db->sql_query('COMMIT'); |
|
| 2329 | + if (!$withinLock) $this->db->sql_query('COMMIT'); |
|
| 2330 | 2330 | |
| 2331 | 2331 | return true; |
| 2332 | 2332 | } |
@@ -2354,13 +2354,13 @@ discard block |
||
| 2354 | 2354 | $this->db->sql_query('BEGIN'); |
| 2355 | 2355 | $res = $this->db->exec_SELECTquery( |
| 2356 | 2356 | 'process_id,active,ttl', |
| 2357 | - 'tx_crawler_process','process_id = \''.$pid.'\' AND deleted=0', |
|
| 2357 | + 'tx_crawler_process', 'process_id = \''.$pid.'\' AND deleted=0', |
|
| 2358 | 2358 | '', |
| 2359 | 2359 | 'ttl', |
| 2360 | 2360 | '0,1' |
| 2361 | 2361 | ); |
| 2362 | - if($row = $this->db->sql_fetch_assoc($res)) { |
|
| 2363 | - $ret = intVal($row['active'])==1; |
|
| 2362 | + if ($row = $this->db->sql_fetch_assoc($res)) { |
|
| 2363 | + $ret = intVal($row['active']) == 1; |
|
| 2364 | 2364 | } |
| 2365 | 2365 | $this->db->sql_query('COMMIT'); |
| 2366 | 2366 | |
@@ -2373,8 +2373,8 @@ discard block |
||
| 2373 | 2373 | * @return string the ID |
| 2374 | 2374 | */ |
| 2375 | 2375 | protected function CLI_buildProcessId() { |
| 2376 | - if(!$this->processID) { |
|
| 2377 | - $this->processID= \TYPO3\CMS\Core\Utility\GeneralUtility::shortMD5($this->microtime(true)); |
|
| 2376 | + if (!$this->processID) { |
|
| 2377 | + $this->processID = \TYPO3\CMS\Core\Utility\GeneralUtility::shortMD5($this->microtime(true)); |
|
| 2378 | 2378 | } |
| 2379 | 2379 | return $this->processID; |
| 2380 | 2380 | } |
@@ -2386,7 +2386,7 @@ discard block |
||
| 2386 | 2386 | * |
| 2387 | 2387 | * @codeCoverageIgnore |
| 2388 | 2388 | */ |
| 2389 | - protected function microtime($get_as_float = false ) |
|
| 2389 | + protected function microtime($get_as_float = false) |
|
| 2390 | 2390 | { |
| 2391 | 2391 | return microtime($get_as_float); |
| 2392 | 2392 | } |
@@ -2399,7 +2399,7 @@ discard block |
||
| 2399 | 2399 | * @codeCoverageIgnore |
| 2400 | 2400 | */ |
| 2401 | 2401 | function CLI_debug($msg) { |
| 2402 | - if(intval($this->extensionSettings['processDebug'])) { |
|
| 2402 | + if (intval($this->extensionSettings['processDebug'])) { |
|
| 2403 | 2403 | echo $msg."\n"; flush(); |
| 2404 | 2404 | } |
| 2405 | 2405 | } |
@@ -2418,7 +2418,7 @@ discard block |
||
| 2418 | 2418 | |
| 2419 | 2419 | $cmd = escapeshellcmd($this->extensionSettings['phpPath']); |
| 2420 | 2420 | $cmd .= ' '; |
| 2421 | - $cmd .= escapeshellarg(\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::extPath('crawler') . 'cli/bootstrap.php'); |
|
| 2421 | + $cmd .= escapeshellarg(\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::extPath('crawler').'cli/bootstrap.php'); |
|
| 2422 | 2422 | $cmd .= ' '; |
| 2423 | 2423 | $cmd .= escapeshellarg($this->getFrontendBasePath()); |
| 2424 | 2424 | $cmd .= ' '; |
@@ -2428,10 +2428,10 @@ discard block |
||
| 2428 | 2428 | |
| 2429 | 2429 | $startTime = microtime(true); |
| 2430 | 2430 | $content = $this->executeShellCommand($cmd); |
| 2431 | - $this->log($url . (microtime(true) - $startTime)); |
|
| 2431 | + $this->log($url.(microtime(true) - $startTime)); |
|
| 2432 | 2432 | |
| 2433 | 2433 | $result = array( |
| 2434 | - 'request' => implode("\r\n", $requestHeaders) . "\r\n\r\n", |
|
| 2434 | + 'request' => implode("\r\n", $requestHeaders)."\r\n\r\n", |
|
| 2435 | 2435 | 'headers' => '', |
| 2436 | 2436 | 'content' => $content |
| 2437 | 2437 | ); |
@@ -2451,7 +2451,7 @@ discard block |
||
| 2451 | 2451 | $scheduledAgeInSeconds = $this->extensionSettings['cleanUpScheduledAge'] * 86400; |
| 2452 | 2452 | |
| 2453 | 2453 | $now = time(); |
| 2454 | - $condition = '(exec_time<>0 AND exec_time<' . ($now - $processedAgeInSeconds) . ') OR scheduled<=' . ($now - $scheduledAgeInSeconds); |
|
| 2454 | + $condition = '(exec_time<>0 AND exec_time<'.($now - $processedAgeInSeconds).') OR scheduled<='.($now - $scheduledAgeInSeconds); |
|
| 2455 | 2455 | $this->flushQueue($condition); |
| 2456 | 2456 | } |
| 2457 | 2457 | |
@@ -2472,7 +2472,7 @@ discard block |
||
| 2472 | 2472 | $GLOBALS['TT']->start(); |
| 2473 | 2473 | } |
| 2474 | 2474 | |
| 2475 | - $GLOBALS['TSFE'] = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\Controller\\TypoScriptFrontendController', $GLOBALS['TYPO3_CONF_VARS'], $id, $typeNum); |
|
| 2475 | + $GLOBALS['TSFE'] = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\Controller\\TypoScriptFrontendController', $GLOBALS['TYPO3_CONF_VARS'], $id, $typeNum); |
|
| 2476 | 2476 | $GLOBALS['TSFE']->sys_page = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\Page\\PageRepository'); |
| 2477 | 2477 | $GLOBALS['TSFE']->sys_page->init(TRUE); |
| 2478 | 2478 | $GLOBALS['TSFE']->connectToDB(); |
@@ -2485,6 +2485,6 @@ discard block |
||
| 2485 | 2485 | } |
| 2486 | 2486 | } |
| 2487 | 2487 | |
| 2488 | -if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/class.tx_crawler_lib.php']) { |
|
| 2488 | +if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/class.tx_crawler_lib.php']) { |
|
| 2489 | 2489 | include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/class.tx_crawler_lib.php']); |
| 2490 | 2490 | } |