| @@ -1,5 +1,7 @@ | ||
| 1 | 1 | <?php | 
| 2 | -if (!defined('TYPO3_cliMode'))	die('You cannot run this script directly!'); | |
| 2 | +if (!defined('TYPO3_cliMode')) { | |
| 3 | +    die('You cannot run this script directly!'); | |
| 4 | +} | |
| 3 | 5 | |
| 4 | 6 |  $crawlerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_lib'); | 
| 5 | 7 | $crawlerObj->CLI_main_im($_SERVER["argv"]); | 
| @@ -37,29 +37,29 @@ | ||
| 37 | 37 | */ | 
| 38 | 38 |  class tx_crawler_cli extends \TYPO3\CMS\Core\Controller\CommandLineController {
 | 
| 39 | 39 | |
| 40 | - /** | |
| 41 | - * Constructor | |
| 42 | - * | |
| 43 | - * @return void | |
| 44 | - */ | |
| 45 | -	function __construct() {
 | |
| 46 | - parent::__construct(); | |
| 40 | + /** | |
| 41 | + * Constructor | |
| 42 | + * | |
| 43 | + * @return void | |
| 44 | + */ | |
| 45 | +    function __construct() {
 | |
| 46 | + parent::__construct(); | |
| 47 | 47 | |
| 48 | -		$this->cli_options[] = array('-h', 'Show the help', '');
 | |
| 49 | -		$this->cli_options[] = array('--help', 'Same as -h', '');
 | |
| 50 | -		$this->cli_options[] = array('--countInARun count', 'Amount of pages', 'How many pages should be crawled during that run.');
 | |
| 51 | -		$this->cli_options[] = array('--sleepTime milliseconds', 'Millisecounds to relax system during crawls', 'Amount of millisecounds which the system should use to relax between crawls.');
 | |
| 52 | -		$this->cli_options[] = array('--sleepAfterFinish seconds', 'Secounds to relax system after all crawls.', 'Amount of secounds which the system should use to relax after all crawls are done.');
 | |
| 48 | +        $this->cli_options[] = array('-h', 'Show the help', '');
 | |
| 49 | +        $this->cli_options[] = array('--help', 'Same as -h', '');
 | |
| 50 | +        $this->cli_options[] = array('--countInARun count', 'Amount of pages', 'How many pages should be crawled during that run.');
 | |
| 51 | +        $this->cli_options[] = array('--sleepTime milliseconds', 'Millisecounds to relax system during crawls', 'Amount of millisecounds which the system should use to relax between crawls.');
 | |
| 52 | +        $this->cli_options[] = array('--sleepAfterFinish seconds', 'Secounds to relax system after all crawls.', 'Amount of secounds which the system should use to relax after all crawls are done.');
 | |
| 53 | 53 | |
| 54 | - // Setting help texts: | |
| 55 | - $this->cli_help['name'] = 'crawler CLI interface -- Crawling the URLs from the queue'; | |
| 56 | - $this->cli_help['synopsis'] = '###OPTIONS###'; | |
| 57 | - $this->cli_help['description'] = ""; | |
| 58 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler\nWill trigger the crawler which starts to process the queue entires\n"; | |
| 59 | - $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2010'; | |
| 60 | - } | |
| 54 | + // Setting help texts: | |
| 55 | + $this->cli_help['name'] = 'crawler CLI interface -- Crawling the URLs from the queue'; | |
| 56 | + $this->cli_help['synopsis'] = '###OPTIONS###'; | |
| 57 | + $this->cli_help['description'] = ""; | |
| 58 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler\nWill trigger the crawler which starts to process the queue entires\n"; | |
| 59 | + $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2010'; | |
| 60 | + } | |
| 61 | 61 | } | 
| 62 | 62 | |
| 63 | 63 |  if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli.php']) {
 | 
| 64 | - include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli.php']); | |
| 64 | + include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli.php']); | |
| 65 | 65 | } | 
| @@ -37,41 +37,41 @@ | ||
| 37 | 37 | */ | 
| 38 | 38 |  class tx_crawler_cli_im extends \TYPO3\CMS\Core\Controller\CommandLineController {
 | 
| 39 | 39 | |
| 40 | - /** | |
| 41 | - * Constructor | |
| 42 | - * | |
| 43 | - * @return void | |
| 44 | - */ | |
| 45 | -	function __construct() {
 | |
| 46 | - parent::__construct(); | |
| 40 | + /** | |
| 41 | + * Constructor | |
| 42 | + * | |
| 43 | + * @return void | |
| 44 | + */ | |
| 45 | +    function __construct() {
 | |
| 46 | + parent::__construct(); | |
| 47 | 47 | |
| 48 | - // Adding options to help archive: | |
| 49 | - /** | |
| 50 | - * We removed the "proc" option as it seemd not to be working any more. But as the complete handling of the crawler has changed regarding the configuration | |
| 51 | - * this is completely ok. Since configuration records were introduced to configure "what should be done" additionally to page ts the way to setup jobs | |
| 52 | - * has drifted from selecting filtering processing instructions to selecting/filtering configuration keys (you can configure the processing instructions there). | |
| 53 | - * This is also reflected in the backend modules and allows you a much clearer and powerful way to work with the crawler extension. | |
| 54 | - */ | |
| 55 | -		// $this->cli_options[] = array('-proc listOfProcInstr', 'Comma list of processing instructions. These are the "actions" carried out when crawling and you must specify at least one. Depends on third-party extensions. Examples are "tx_cachemgm_recache" from "cachemgm" extension (will recache pages), "tx_staticpub_publish" from "staticpub" (publishing pages to static files) or "tx_indexedsearch_reindex" from "indexed_search" (indexes pages).');
 | |
| 56 | - // TODO: cleanup here! | |
| 57 | -		$this->cli_options[] = array('-d depth', 'Tree depth, 0-99', "How many levels under the 'page_id' to include.");
 | |
| 58 | -		$this->cli_options[] = array('-o mode', 'Output mode: "url", "exec", "queue"', "Specifies output modes\nurl : Will list URLs which wget could use as input.\nqueue: Will put entries in queue table.\nexec: Will execute all entries right away!");
 | |
| 59 | -		$this->cli_options[] = array('-n number', 'Number of items per minute.', 'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"');
 | |
| 60 | -		$this->cli_options[] = array('-conf configurationkeys','List of Configuration Keys','A commaseperated list of crawler configurations');
 | |
| 61 | -		#		$this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n  0 = all output\n  1 = info and greater (default)\n  2 = warnings and greater\n  3 = errors");
 | |
| 48 | + // Adding options to help archive: | |
| 49 | + /** | |
| 50 | + * We removed the "proc" option as it seemd not to be working any more. But as the complete handling of the crawler has changed regarding the configuration | |
| 51 | + * this is completely ok. Since configuration records were introduced to configure "what should be done" additionally to page ts the way to setup jobs | |
| 52 | + * has drifted from selecting filtering processing instructions to selecting/filtering configuration keys (you can configure the processing instructions there). | |
| 53 | + * This is also reflected in the backend modules and allows you a much clearer and powerful way to work with the crawler extension. | |
| 54 | + */ | |
| 55 | +        // $this->cli_options[] = array('-proc listOfProcInstr', 'Comma list of processing instructions. These are the "actions" carried out when crawling and you must specify at least one. Depends on third-party extensions. Examples are "tx_cachemgm_recache" from "cachemgm" extension (will recache pages), "tx_staticpub_publish" from "staticpub" (publishing pages to static files) or "tx_indexedsearch_reindex" from "indexed_search" (indexes pages).');
 | |
| 56 | + // TODO: cleanup here! | |
| 57 | +        $this->cli_options[] = array('-d depth', 'Tree depth, 0-99', "How many levels under the 'page_id' to include.");
 | |
| 58 | +        $this->cli_options[] = array('-o mode', 'Output mode: "url", "exec", "queue"', "Specifies output modes\nurl : Will list URLs which wget could use as input.\nqueue: Will put entries in queue table.\nexec: Will execute all entries right away!");
 | |
| 59 | +        $this->cli_options[] = array('-n number', 'Number of items per minute.', 'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"');
 | |
| 60 | +        $this->cli_options[] = array('-conf configurationkeys','List of Configuration Keys','A commaseperated list of crawler configurations');
 | |
| 61 | +        #		$this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n  0 = all output\n  1 = info and greater (default)\n  2 = warnings and greater\n  3 = errors");
 | |
| 62 | 62 | |
| 63 | - // Setting help texts: | |
| 64 | - $this->cli_help['name'] = 'crawler CLI interface -- Submitting URLs to be crawled via CLI interface.'; | |
| 65 | - $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; | |
| 66 | - $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It can put entries in the queue from command line options, return the list of URLs and even execute all entries right away without having to queue them up - this can be useful for immediate re-cache, re-indexing or static publishing from command line."; | |
| 67 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_im 7 -d=2 -conf=<configurationKey> -o=exec\nWill re-cache pages from page 7 and two levels down, executed immediately.\n"; | |
| 68 | - $this->cli_help['examples'].= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; | |
| 69 | - $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; | |
| 70 | - } | |
| 63 | + // Setting help texts: | |
| 64 | + $this->cli_help['name'] = 'crawler CLI interface -- Submitting URLs to be crawled via CLI interface.'; | |
| 65 | + $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; | |
| 66 | + $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It can put entries in the queue from command line options, return the list of URLs and even execute all entries right away without having to queue them up - this can be useful for immediate re-cache, re-indexing or static publishing from command line."; | |
| 67 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_im 7 -d=2 -conf=<configurationKey> -o=exec\nWill re-cache pages from page 7 and two levels down, executed immediately.\n"; | |
| 68 | + $this->cli_help['examples'].= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; | |
| 69 | + $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; | |
| 70 | + } | |
| 71 | 71 | } | 
| 72 | 72 | |
| 73 | 73 |  if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']) {
 | 
| 74 | - include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']); | |
| 74 | + include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']); | |
| 75 | 75 | } | 
| 76 | 76 | |
| 77 | 77 | ?> | 
| @@ -57,7 +57,7 @@ discard block | ||
| 57 | 57 |  		$this->cli_options[] = array('-d depth', 'Tree depth, 0-99', "How many levels under the 'page_id' to include.");
 | 
| 58 | 58 |  		$this->cli_options[] = array('-o mode', 'Output mode: "url", "exec", "queue"', "Specifies output modes\nurl : Will list URLs which wget could use as input.\nqueue: Will put entries in queue table.\nexec: Will execute all entries right away!");
 | 
| 59 | 59 |  		$this->cli_options[] = array('-n number', 'Number of items per minute.', 'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"');
 | 
| 60 | -		$this->cli_options[] = array('-conf configurationkeys','List of Configuration Keys','A commaseperated list of crawler configurations');
 | |
| 60 | +		$this->cli_options[] = array('-conf configurationkeys', 'List of Configuration Keys', 'A commaseperated list of crawler configurations');
 | |
| 61 | 61 |  		#		$this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n  0 = all output\n  1 = info and greater (default)\n  2 = warnings and greater\n  3 = errors");
 | 
| 62 | 62 | |
| 63 | 63 | // Setting help texts: | 
| @@ -65,7 +65,7 @@ discard block | ||
| 65 | 65 | $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; | 
| 66 | 66 | $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It can put entries in the queue from command line options, return the list of URLs and even execute all entries right away without having to queue them up - this can be useful for immediate re-cache, re-indexing or static publishing from command line."; | 
| 67 | 67 | $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_im 7 -d=2 -conf=<configurationKey> -o=exec\nWill re-cache pages from page 7 and two levels down, executed immediately.\n"; | 
| 68 | - $this->cli_help['examples'].= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; | |
| 68 | + $this->cli_help['examples'] .= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; | |
| 69 | 69 | $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; | 
| 70 | 70 | } | 
| 71 | 71 | } | 
| @@ -1,5 +1,7 @@ | ||
| 1 | 1 | <?php | 
| 2 | -if (!defined('TYPO3_cliMode'))	die('You cannot run this script directly!'); | |
| 2 | +if (!defined('TYPO3_cliMode')) { | |
| 3 | +    die('You cannot run this script directly!'); | |
| 4 | +} | |
| 3 | 5 | |
| 4 | 6 |  $crawlerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_lib'); | 
| 5 | 7 | $crawlerObj->CLI_main_flush($_SERVER["argv"]); | 
| @@ -5,20 +5,20 @@ discard block | ||
| 5 | 5 | * Retrieve path (taken from cli_dispatch.phpsh) | 
| 6 | 6 | */ | 
| 7 | 7 | |
| 8 | - // Get path to this script | |
| 8 | + // Get path to this script | |
| 9 | 9 | $tempPathThisScript = isset($_SERVER['argv'][0]) ? $_SERVER['argv'][0] : (isset($_ENV['_']) ? $_ENV['_'] : $_SERVER['_']); | 
| 10 | 10 | |
| 11 | - // Resolve path | |
| 11 | + // Resolve path | |
| 12 | 12 |  if (!isAbsPath($tempPathThisScript)) { | 
| 13 | - $workingDirectory = $_SERVER['PWD'] ? $_SERVER['PWD'] : getcwd(); | |
| 14 | -	if ($workingDirectory) { | |
| 15 | -		$tempPathThisScript = $workingDirectory . '/' . preg_replace('/\.\//', '', $tempPathThisScript); | |
| 16 | -		if (!@is_file($tempPathThisScript)) { | |
| 17 | -			die('Relative path found, but an error occured during resolving of the absolute path: ' . $tempPathThisScript . PHP_EOL); | |
| 18 | - } | |
| 19 | -	} else { | |
| 20 | -		die('Relative path found, but resolving absolute path is not supported on this platform.' . PHP_EOL); | |
| 21 | - } | |
| 13 | + $workingDirectory = $_SERVER['PWD'] ? $_SERVER['PWD'] : getcwd(); | |
| 14 | +    if ($workingDirectory) { | |
| 15 | +        $tempPathThisScript = $workingDirectory . '/' . preg_replace('/\.\//', '', $tempPathThisScript); | |
| 16 | +        if (!@is_file($tempPathThisScript)) { | |
| 17 | +            die('Relative path found, but an error occured during resolving of the absolute path: ' . $tempPathThisScript . PHP_EOL); | |
| 18 | + } | |
| 19 | +    } else { | |
| 20 | +        die('Relative path found, but resolving absolute path is not supported on this platform.' . PHP_EOL); | |
| 21 | + } | |
| 22 | 22 | } | 
| 23 | 23 | |
| 24 | 24 |  $typo3Root = preg_replace('#typo3conf/ext/crawler/cli/bootstrap.php$#', '', $tempPathThisScript); | 
| @@ -30,33 +30,33 @@ discard block | ||
| 30 | 30 | */ | 
| 31 | 31 | $additionalHeaders = unserialize(base64_decode($_SERVER['argv'][3])); | 
| 32 | 32 |  if (is_array($additionalHeaders)) { | 
| 33 | -	foreach ($additionalHeaders as $additionalHeader) { | |
| 34 | -		if (strpos($additionalHeader, ':') !== FALSE) { | |
| 35 | -			list($key, $value) = explode(':', $additionalHeader, 2); | |
| 36 | -			$key = str_replace('-', '_', strtoupper(trim($key))); | |
| 37 | -			if ($key != 'HOST') { | |
| 38 | - $_SERVER['HTTP_' . $key] = $value; | |
| 39 | - } | |
| 40 | - } | |
| 41 | - } | |
| 33 | +    foreach ($additionalHeaders as $additionalHeader) { | |
| 34 | +        if (strpos($additionalHeader, ':') !== FALSE) { | |
| 35 | +            list($key, $value) = explode(':', $additionalHeader, 2); | |
| 36 | +            $key = str_replace('-', '_', strtoupper(trim($key))); | |
| 37 | +            if ($key != 'HOST') { | |
| 38 | + $_SERVER['HTTP_' . $key] = $value; | |
| 39 | + } | |
| 40 | + } | |
| 41 | + } | |
| 42 | 42 | } | 
| 43 | 43 | |
| 44 | 44 | |
| 45 | - // put parsed query parts into $_GET array | |
| 45 | + // put parsed query parts into $_GET array | |
| 46 | 46 | $urlParts = parse_url($_SERVER['argv'][2]); | 
| 47 | - // Populating $_GET | |
| 47 | + // Populating $_GET | |
| 48 | 48 | parse_str($urlParts['query'], $_GET); | 
| 49 | - // Populating $_REQUEST | |
| 49 | + // Populating $_REQUEST | |
| 50 | 50 | parse_str($urlParts['query'], $_REQUEST); | 
| 51 | - // Populating $_POST | |
| 51 | + // Populating $_POST | |
| 52 | 52 | $_POST = array(); | 
| 53 | - // Populating $_COOKIE | |
| 53 | + // Populating $_COOKIE | |
| 54 | 54 | $_COOKIE = array(); | 
| 55 | 55 | |
| 56 | - // Get the TYPO3_SITE_PATH of the website frontend: | |
| 56 | + // Get the TYPO3_SITE_PATH of the website frontend: | |
| 57 | 57 | $typo3SitePath = $_SERVER['argv'][1]; | 
| 58 | 58 | |
| 59 | - // faking the environment | |
| 59 | + // faking the environment | |
| 60 | 60 |  $_SERVER['DOCUMENT_ROOT'] = preg_replace('#' . preg_quote($typo3SitePath, '#') . '$#', '', $typo3Root); | 
| 61 | 61 | $_SERVER['HTTP_USER_AGENT'] = 'CLI Mode'; | 
| 62 | 62 | $_SERVER['HTTP_HOST'] = $_SERVER['SERVER_NAME'] = $urlParts['host']; | 
| @@ -66,15 +66,15 @@ discard block | ||
| 66 | 66 | $_SERVER['REQUEST_URI'] = $urlParts['path'] . (isset($urlParts['query']) ? '?' . $urlParts['query'] : ''); | 
| 67 | 67 | $_SERVER['REQUEST_METHOD'] = 'GET'; | 
| 68 | 68 | |
| 69 | - // Define a port if used in the URL: | |
| 69 | + // Define a port if used in the URL: | |
| 70 | 70 |  if (isset($urlParts['port'])) { | 
| 71 | - $_SERVER['HTTP_HOST'] .= ':' . $urlParts['port']; | |
| 72 | - $_SERVER['SERVER_PORT'] = $urlParts['port']; | |
| 71 | + $_SERVER['HTTP_HOST'] .= ':' . $urlParts['port']; | |
| 72 | + $_SERVER['SERVER_PORT'] = $urlParts['port']; | |
| 73 | 73 | } | 
| 74 | 74 | |
| 75 | - // Define HTTPS disposal: | |
| 75 | + // Define HTTPS disposal: | |
| 76 | 76 |  if ($urlParts['scheme'] === 'https') { | 
| 77 | - $_SERVER['HTTPS'] = 'on'; | |
| 77 | + $_SERVER['HTTPS'] = 'on'; | |
| 78 | 78 | } | 
| 79 | 79 | |
| 80 | 80 | chdir($typo3Root); | 
| @@ -88,11 +88,11 @@ discard block | ||
| 88 | 88 | * @return boolean | 
| 89 | 89 | */ | 
| 90 | 90 |  function isAbsPath($path) { | 
| 91 | - // on Windows also a path starting with a drive letter is absolute: X:/ | |
| 92 | -	if (stristr(PHP_OS, 'win') && substr($path, 1, 2) === ':/') { | |
| 93 | - return TRUE; | |
| 94 | - } | |
| 91 | + // on Windows also a path starting with a drive letter is absolute: X:/ | |
| 92 | +    if (stristr(PHP_OS, 'win') && substr($path, 1, 2) === ':/') { | |
| 93 | + return TRUE; | |
| 94 | + } | |
| 95 | 95 | |
| 96 | - // path starting with a / is always absolute, on every system | |
| 97 | - return (substr($path, 0, 1) === '/'); | |
| 96 | + // path starting with a / is always absolute, on every system | |
| 97 | + return (substr($path, 0, 1) === '/'); | |
| 98 | 98 | } | 
| @@ -12,12 +12,12 @@ discard block | ||
| 12 | 12 |  if (!isAbsPath($tempPathThisScript)) { | 
| 13 | 13 | $workingDirectory = $_SERVER['PWD'] ? $_SERVER['PWD'] : getcwd(); | 
| 14 | 14 |  	if ($workingDirectory) { | 
| 15 | -		$tempPathThisScript = $workingDirectory . '/' . preg_replace('/\.\//', '', $tempPathThisScript); | |
| 15 | +		$tempPathThisScript = $workingDirectory.'/'.preg_replace('/\.\//', '', $tempPathThisScript); | |
| 16 | 16 |  		if (!@is_file($tempPathThisScript)) { | 
| 17 | -			die('Relative path found, but an error occured during resolving of the absolute path: ' . $tempPathThisScript . PHP_EOL); | |
| 17 | +			die('Relative path found, but an error occured during resolving of the absolute path: '.$tempPathThisScript.PHP_EOL); | |
| 18 | 18 | } | 
| 19 | 19 |  	} else { | 
| 20 | -		die('Relative path found, but resolving absolute path is not supported on this platform.' . PHP_EOL); | |
| 20 | +		die('Relative path found, but resolving absolute path is not supported on this platform.'.PHP_EOL); | |
| 21 | 21 | } | 
| 22 | 22 | } | 
| 23 | 23 | |
| @@ -35,7 +35,7 @@ discard block | ||
| 35 | 35 |  			list($key, $value) = explode(':', $additionalHeader, 2); | 
| 36 | 36 |  			$key = str_replace('-', '_', strtoupper(trim($key))); | 
| 37 | 37 |  			if ($key != 'HOST') { | 
| 38 | - $_SERVER['HTTP_' . $key] = $value; | |
| 38 | + $_SERVER['HTTP_'.$key] = $value; | |
| 39 | 39 | } | 
| 40 | 40 | } | 
| 41 | 41 | } | 
| @@ -57,18 +57,18 @@ discard block | ||
| 57 | 57 | $typo3SitePath = $_SERVER['argv'][1]; | 
| 58 | 58 | |
| 59 | 59 | // faking the environment | 
| 60 | -$_SERVER['DOCUMENT_ROOT'] = preg_replace('#' . preg_quote($typo3SitePath, '#') . '$#', '', $typo3Root); | |
| 60 | +$_SERVER['DOCUMENT_ROOT'] = preg_replace('#'.preg_quote($typo3SitePath, '#').'$#', '', $typo3Root); | |
| 61 | 61 | $_SERVER['HTTP_USER_AGENT'] = 'CLI Mode'; | 
| 62 | 62 | $_SERVER['HTTP_HOST'] = $_SERVER['SERVER_NAME'] = $urlParts['host']; | 
| 63 | -$_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'] = $typo3SitePath . 'index.php'; | |
| 64 | -$_SERVER['SCRIPT_FILENAME'] = $_SERVER['PATH_TRANSLATED'] = $typo3Root . 'index.php'; | |
| 63 | +$_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'] = $typo3SitePath.'index.php'; | |
| 64 | +$_SERVER['SCRIPT_FILENAME'] = $_SERVER['PATH_TRANSLATED'] = $typo3Root.'index.php'; | |
| 65 | 65 | $_SERVER['QUERY_STRING'] = (isset($urlParts['query']) ? $urlParts['query'] : ''); | 
| 66 | -$_SERVER['REQUEST_URI'] = $urlParts['path'] . (isset($urlParts['query']) ? '?' . $urlParts['query'] : ''); | |
| 66 | +$_SERVER['REQUEST_URI'] = $urlParts['path'].(isset($urlParts['query']) ? '?'.$urlParts['query'] : ''); | |
| 67 | 67 | $_SERVER['REQUEST_METHOD'] = 'GET'; | 
| 68 | 68 | |
| 69 | 69 | // Define a port if used in the URL: | 
| 70 | 70 |  if (isset($urlParts['port'])) { | 
| 71 | - $_SERVER['HTTP_HOST'] .= ':' . $urlParts['port']; | |
| 71 | + $_SERVER['HTTP_HOST'] .= ':'.$urlParts['port']; | |
| 72 | 72 | $_SERVER['SERVER_PORT'] = $urlParts['port']; | 
| 73 | 73 | } | 
| 74 | 74 | |
| @@ -78,7 +78,7 @@ discard block | ||
| 78 | 78 | } | 
| 79 | 79 | |
| 80 | 80 | chdir($typo3Root); | 
| 81 | -include($typo3Root . '/index.php'); | |
| 81 | +include($typo3Root.'/index.php'); | |
| 82 | 82 | |
| 83 | 83 | |
| 84 | 84 | /** | 
| @@ -37,30 +37,30 @@ | ||
| 37 | 37 | */ | 
| 38 | 38 |  class tx_crawler_cli_flush extends \TYPO3\CMS\Core\Controller\CommandLineController { | 
| 39 | 39 | |
| 40 | - /** | |
| 41 | - * Constructor | |
| 42 | - * | |
| 43 | - * @return void | |
| 44 | - */ | |
| 45 | -	function __construct() { | |
| 46 | - parent::__construct(); | |
| 40 | + /** | |
| 41 | + * Constructor | |
| 42 | + * | |
| 43 | + * @return void | |
| 44 | + */ | |
| 45 | +    function __construct() { | |
| 46 | + parent::__construct(); | |
| 47 | 47 | |
| 48 | - // Adding options to help archive: | |
| 49 | -		$this->cli_options[] = array('-o mode', 'Output mode: "finished", "all", "pending"', "Specifies the type queue entries which is flushed in the process."); | |
| 50 | -		#		$this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n  0 = all output\n  1 = info and greater (default)\n  2 = warnings and greater\n  3 = errors"); | |
| 48 | + // Adding options to help archive: | |
| 49 | +        $this->cli_options[] = array('-o mode', 'Output mode: "finished", "all", "pending"', "Specifies the type queue entries which is flushed in the process."); | |
| 50 | +        #		$this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n  0 = all output\n  1 = info and greater (default)\n  2 = warnings and greater\n  3 = errors"); | |
| 51 | 51 | |
| 52 | - // Setting help texts: | |
| 53 | - $this->cli_help['name'] = 'crawler CLI interface -- Cleaning up the queue.'; | |
| 54 | - $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; | |
| 55 | - $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It will remove queue entires and perform a cleanup."; | |
| 56 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 5 -o=finished\nWill remove all finished queue-entries in the sub-branch of page 5\n"; | |
| 57 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 0 -o=all\nWill remove all queue-entries for every page\n"; | |
| 58 | - $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; | |
| 59 | - } | |
| 52 | + // Setting help texts: | |
| 53 | + $this->cli_help['name'] = 'crawler CLI interface -- Cleaning up the queue.'; | |
| 54 | + $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; | |
| 55 | + $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It will remove queue entires and perform a cleanup."; | |
| 56 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 5 -o=finished\nWill remove all finished queue-entries in the sub-branch of page 5\n"; | |
| 57 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 0 -o=all\nWill remove all queue-entries for every page\n"; | |
| 58 | + $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; | |
| 59 | + } | |
| 60 | 60 | } | 
| 61 | 61 | |
| 62 | 62 |  if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_flush.php']) { | 
| 63 | - include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_flush.php']); | |
| 63 | + include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_flush.php']); | |
| 64 | 64 | } | 
| 65 | 65 | |
| 66 | 66 | ?> | 
| @@ -24,43 +24,43 @@ | ||
| 24 | 24 | |
| 25 | 25 |  abstract class tx_crawler_domain_lib_abstract_dbobject { | 
| 26 | 26 | |
| 27 | - /** | |
| 28 | - * @var array | |
| 29 | - */ | |
| 30 | - protected $row; | |
| 27 | + /** | |
| 28 | + * @var array | |
| 29 | + */ | |
| 30 | + protected $row; | |
| 31 | 31 | |
| 32 | - /** | |
| 33 | - * @var string table name | |
| 34 | - */ | |
| 35 | - protected static $tableName; | |
| 32 | + /** | |
| 33 | + * @var string table name | |
| 34 | + */ | |
| 35 | + protected static $tableName; | |
| 36 | 36 | |
| 37 | - /** | |
| 38 | - * Constructor | |
| 39 | - * | |
| 40 | - * @param array $row optional array with propertys | |
| 41 | - */ | |
| 42 | -	public function __construct($row = array()) { | |
| 43 | - $this->row = $row; | |
| 44 | - } | |
| 37 | + /** | |
| 38 | + * Constructor | |
| 39 | + * | |
| 40 | + * @param array $row optional array with propertys | |
| 41 | + */ | |
| 42 | +    public function __construct($row = array()) { | |
| 43 | + $this->row = $row; | |
| 44 | + } | |
| 45 | 45 | |
| 46 | - /** | |
| 47 | - * Get table name | |
| 48 | - * | |
| 49 | - * @param void | |
| 50 | - * @return string table name | |
| 51 | - */ | |
| 52 | -	public static function getTableName(){ | |
| 53 | - return self::$tableName; | |
| 54 | - } | |
| 46 | + /** | |
| 47 | + * Get table name | |
| 48 | + * | |
| 49 | + * @param void | |
| 50 | + * @return string table name | |
| 51 | + */ | |
| 52 | +    public static function getTableName(){ | |
| 53 | + return self::$tableName; | |
| 54 | + } | |
| 55 | 55 | |
| 56 | - /** | |
| 57 | - * Returns the propertys of the object as array | |
| 58 | - * | |
| 59 | - * @return array | |
| 60 | - */ | |
| 61 | -	public function getRow() { | |
| 62 | - return $this->row; | |
| 63 | - } | |
| 56 | + /** | |
| 57 | + * Returns the propertys of the object as array | |
| 58 | + * | |
| 59 | + * @return array | |
| 60 | + */ | |
| 61 | +    public function getRow() { | |
| 62 | + return $this->row; | |
| 63 | + } | |
| 64 | 64 | |
| 65 | 65 | |
| 66 | 66 | } | 
| @@ -49,7 +49,7 @@ | ||
| 49 | 49 | * @param void | 
| 50 | 50 | * @return string table name | 
| 51 | 51 | */ | 
| 52 | -	public static function getTableName(){ | |
| 52 | +	public static function getTableName() { | |
| 53 | 53 | return self::$tableName; | 
| 54 | 54 | } | 
| 55 | 55 | |
| @@ -41,62 +41,62 @@ | ||
| 41 | 41 | */ | 
| 42 | 42 |  class tx_crawler_domain_process_collection extends ArrayObject { | 
| 43 | 43 | |
| 44 | - /** | |
| 45 | - * Method to retrieve an element from the collection. | |
| 46 | - * @access public | |
| 47 | - * @throws Exception | |
| 48 | - * @return tx_crawler_domain_process | |
| 49 | - */ | |
| 50 | -	public function offsetGet($index) { | |
| 51 | -		if (! parent::offsetExists($index)) { | |
| 52 | -			throw new Exception('Index "' . var_export($index, true) . '" for tx_crawler_domain_process are not available'); | |
| 53 | - } | |
| 54 | - return parent::offsetGet($index); | |
| 55 | - } | |
| 44 | + /** | |
| 45 | + * Method to retrieve an element from the collection. | |
| 46 | + * @access public | |
| 47 | + * @throws Exception | |
| 48 | + * @return tx_crawler_domain_process | |
| 49 | + */ | |
| 50 | +    public function offsetGet($index) { | |
| 51 | +        if (! parent::offsetExists($index)) { | |
| 52 | +            throw new Exception('Index "' . var_export($index, true) . '" for tx_crawler_domain_process are not available'); | |
| 53 | + } | |
| 54 | + return parent::offsetGet($index); | |
| 55 | + } | |
| 56 | 56 | |
| 57 | - /** | |
| 58 | - * Method to add an element to the collection- | |
| 59 | - * | |
| 60 | - * @param mixed $index | |
| 61 | - * @param tx_crawler_domain_process $subject | |
| 62 | - * @throws InvalidArgumentException | |
| 63 | - * @return void | |
| 64 | - */ | |
| 65 | -	public function offsetSet($index, $subject) { | |
| 66 | -		if (! $subject instanceof tx_crawler_domain_process ) { | |
| 67 | -			throw new InvalidArgumentException('Wrong parameter type given, "tx_crawler_domain_process" expected!'); | |
| 68 | - } | |
| 69 | - parent::offsetSet($index, $subject); | |
| 70 | - } | |
| 57 | + /** | |
| 58 | + * Method to add an element to the collection- | |
| 59 | + * | |
| 60 | + * @param mixed $index | |
| 61 | + * @param tx_crawler_domain_process $subject | |
| 62 | + * @throws InvalidArgumentException | |
| 63 | + * @return void | |
| 64 | + */ | |
| 65 | +    public function offsetSet($index, $subject) { | |
| 66 | +        if (! $subject instanceof tx_crawler_domain_process ) { | |
| 67 | +            throw new InvalidArgumentException('Wrong parameter type given, "tx_crawler_domain_process" expected!'); | |
| 68 | + } | |
| 69 | + parent::offsetSet($index, $subject); | |
| 70 | + } | |
| 71 | 71 | |
| 72 | - /** | |
| 73 | - * Method to append an element to the collection | |
| 74 | - * @param tx_crawler_domain_process $subject | |
| 75 | - * @throws InvalidArgumentException | |
| 76 | - * @return void | |
| 77 | - */ | |
| 78 | -	public function append($subject) { | |
| 79 | -		if (! $subject instanceof tx_crawler_domain_process ) { | |
| 80 | -			throw new InvalidArgumentException('Wrong parameter type given, "tx_crawler_domain_process" expected!'); | |
| 81 | - } | |
| 82 | - parent::append($subject); | |
| 83 | - } | |
| 72 | + /** | |
| 73 | + * Method to append an element to the collection | |
| 74 | + * @param tx_crawler_domain_process $subject | |
| 75 | + * @throws InvalidArgumentException | |
| 76 | + * @return void | |
| 77 | + */ | |
| 78 | +    public function append($subject) { | |
| 79 | +        if (! $subject instanceof tx_crawler_domain_process ) { | |
| 80 | +            throw new InvalidArgumentException('Wrong parameter type given, "tx_crawler_domain_process" expected!'); | |
| 81 | + } | |
| 82 | + parent::append($subject); | |
| 83 | + } | |
| 84 | 84 | |
| 85 | - /** | |
| 86 | - * returns array of process ids of the current collection | |
| 87 | - * @return array | |
| 88 | - */ | |
| 89 | -	public function getProcessIds() { | |
| 90 | - $result=array(); | |
| 91 | -		foreach ($this->getIterator() as $value) { | |
| 92 | - $result[]=$value->getProcess_id(); | |
| 93 | - } | |
| 94 | - return $result; | |
| 95 | - } | |
| 85 | + /** | |
| 86 | + * returns array of process ids of the current collection | |
| 87 | + * @return array | |
| 88 | + */ | |
| 89 | +    public function getProcessIds() { | |
| 90 | + $result=array(); | |
| 91 | +        foreach ($this->getIterator() as $value) { | |
| 92 | + $result[]=$value->getProcess_id(); | |
| 93 | + } | |
| 94 | + return $result; | |
| 95 | + } | |
| 96 | 96 | } | 
| 97 | 97 | |
| 98 | 98 | |
| 99 | 99 |  if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/domain/process/class.tx_crawler_domain_process_collection.php']) { | 
| 100 | - include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/domain/process/class.tx_crawler_domain_process_collection.php']); | |
| 100 | + include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/domain/process/class.tx_crawler_domain_process_collection.php']); | |
| 101 | 101 | } | 
| 102 | 102 | ?> | 
| 103 | 103 | \ No newline at end of file | 
| @@ -48,8 +48,8 @@ discard block | ||
| 48 | 48 | * @return tx_crawler_domain_process | 
| 49 | 49 | */ | 
| 50 | 50 |  	public function offsetGet($index) { | 
| 51 | -		if (! parent::offsetExists($index)) { | |
| 52 | -			throw new Exception('Index "' . var_export($index, true) . '" for tx_crawler_domain_process are not available'); | |
| 51 | +		if (!parent::offsetExists($index)) { | |
| 52 | +			throw new Exception('Index "'.var_export($index, true).'" for tx_crawler_domain_process are not available'); | |
| 53 | 53 | } | 
| 54 | 54 | return parent::offsetGet($index); | 
| 55 | 55 | } | 
| @@ -63,7 +63,7 @@ discard block | ||
| 63 | 63 | * @return void | 
| 64 | 64 | */ | 
| 65 | 65 |  	public function offsetSet($index, $subject) { | 
| 66 | -		if (! $subject instanceof tx_crawler_domain_process ) { | |
| 66 | +		if (!$subject instanceof tx_crawler_domain_process) { | |
| 67 | 67 |  			throw new InvalidArgumentException('Wrong parameter type given, "tx_crawler_domain_process" expected!'); | 
| 68 | 68 | } | 
| 69 | 69 | parent::offsetSet($index, $subject); | 
| @@ -76,7 +76,7 @@ discard block | ||
| 76 | 76 | * @return void | 
| 77 | 77 | */ | 
| 78 | 78 |  	public function append($subject) { | 
| 79 | -		if (! $subject instanceof tx_crawler_domain_process ) { | |
| 79 | +		if (!$subject instanceof tx_crawler_domain_process) { | |
| 80 | 80 |  			throw new InvalidArgumentException('Wrong parameter type given, "tx_crawler_domain_process" expected!'); | 
| 81 | 81 | } | 
| 82 | 82 | parent::append($subject); | 
| @@ -87,9 +87,9 @@ discard block | ||
| 87 | 87 | * @return array | 
| 88 | 88 | */ | 
| 89 | 89 |  	public function getProcessIds() { | 
| 90 | - $result=array(); | |
| 90 | + $result = array(); | |
| 91 | 91 |  		foreach ($this->getIterator() as $value) { | 
| 92 | - $result[]=$value->getProcess_id(); | |
| 92 | + $result[] = $value->getProcess_id(); | |
| 93 | 93 | } | 
| 94 | 94 | return $result; | 
| 95 | 95 | } | 
| @@ -24,14 +24,14 @@ | ||
| 24 | 24 | |
| 25 | 25 |  interface tx_crawler_domain_events_observer { | 
| 26 | 26 | |
| 27 | - /** | |
| 28 | - * This method should be implemented by the observer to register events | |
| 29 | - * that should be forwarded to the observer | |
| 30 | - * | |
| 31 | - * @param tx_crawler_domain_events_dispatcher $dispatcher | |
| 32 | - * @return boolean | |
| 33 | - */ | |
| 34 | - public function registerObservers(tx_crawler_domain_events_dispatcher $dispatcher); | |
| 27 | + /** | |
| 28 | + * This method should be implemented by the observer to register events | |
| 29 | + * that should be forwarded to the observer | |
| 30 | + * | |
| 31 | + * @param tx_crawler_domain_events_dispatcher $dispatcher | |
| 32 | + * @return boolean | |
| 33 | + */ | |
| 34 | + public function registerObservers(tx_crawler_domain_events_dispatcher $dispatcher); | |
| 35 | 35 | } | 
| 36 | 36 | |
| 37 | 37 | ?> | 
| 38 | 38 | \ No newline at end of file |