@@ -37,29 +37,29 @@ |
||
37 | 37 | */ |
38 | 38 | class tx_crawler_cli extends \TYPO3\CMS\Core\Controller\CommandLineController { |
39 | 39 | |
40 | - /** |
|
41 | - * Constructor |
|
42 | - * |
|
43 | - * @return void |
|
44 | - */ |
|
45 | - function __construct() { |
|
46 | - parent::__construct(); |
|
40 | + /** |
|
41 | + * Constructor |
|
42 | + * |
|
43 | + * @return void |
|
44 | + */ |
|
45 | + function __construct() { |
|
46 | + parent::__construct(); |
|
47 | 47 | |
48 | - $this->cli_options[] = array('-h', 'Show the help', ''); |
|
49 | - $this->cli_options[] = array('--help', 'Same as -h', ''); |
|
50 | - $this->cli_options[] = array('--countInARun count', 'Amount of pages', 'How many pages should be crawled during that run.'); |
|
51 | - $this->cli_options[] = array('--sleepTime milliseconds', 'Millisecounds to relax system during crawls', 'Amount of millisecounds which the system should use to relax between crawls.'); |
|
52 | - $this->cli_options[] = array('--sleepAfterFinish seconds', 'Secounds to relax system after all crawls.', 'Amount of secounds which the system should use to relax after all crawls are done.'); |
|
48 | + $this->cli_options[] = array('-h', 'Show the help', ''); |
|
49 | + $this->cli_options[] = array('--help', 'Same as -h', ''); |
|
50 | + $this->cli_options[] = array('--countInARun count', 'Amount of pages', 'How many pages should be crawled during that run.'); |
|
51 | + $this->cli_options[] = array('--sleepTime milliseconds', 'Millisecounds to relax system during crawls', 'Amount of millisecounds which the system should use to relax between crawls.'); |
|
52 | + $this->cli_options[] = array('--sleepAfterFinish seconds', 'Secounds to relax system after all crawls.', 'Amount of secounds which the system should use to relax after all crawls are done.'); |
|
53 | 53 | |
54 | - // Setting help texts: |
|
55 | - $this->cli_help['name'] = 'crawler CLI interface -- Crawling the URLs from the queue'; |
|
56 | - $this->cli_help['synopsis'] = '###OPTIONS###'; |
|
57 | - $this->cli_help['description'] = ""; |
|
58 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler\nWill trigger the crawler which starts to process the queue entires\n"; |
|
59 | - $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2010'; |
|
60 | - } |
|
54 | + // Setting help texts: |
|
55 | + $this->cli_help['name'] = 'crawler CLI interface -- Crawling the URLs from the queue'; |
|
56 | + $this->cli_help['synopsis'] = '###OPTIONS###'; |
|
57 | + $this->cli_help['description'] = ""; |
|
58 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler\nWill trigger the crawler which starts to process the queue entires\n"; |
|
59 | + $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2010'; |
|
60 | + } |
|
61 | 61 | } |
62 | 62 | |
63 | 63 | if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli.php']) { |
64 | - include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli.php']); |
|
64 | + include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli.php']); |
|
65 | 65 | } |
@@ -37,41 +37,41 @@ |
||
37 | 37 | */ |
38 | 38 | class tx_crawler_cli_im extends \TYPO3\CMS\Core\Controller\CommandLineController { |
39 | 39 | |
40 | - /** |
|
41 | - * Constructor |
|
42 | - * |
|
43 | - * @return void |
|
44 | - */ |
|
45 | - function __construct() { |
|
46 | - parent::__construct(); |
|
40 | + /** |
|
41 | + * Constructor |
|
42 | + * |
|
43 | + * @return void |
|
44 | + */ |
|
45 | + function __construct() { |
|
46 | + parent::__construct(); |
|
47 | 47 | |
48 | - // Adding options to help archive: |
|
49 | - /** |
|
50 | - * We removed the "proc" option as it seemd not to be working any more. But as the complete handling of the crawler has changed regarding the configuration |
|
51 | - * this is completely ok. Since configuration records were introduced to configure "what should be done" additionally to page ts the way to setup jobs |
|
52 | - * has drifted from selecting filtering processing instructions to selecting/filtering configuration keys (you can configure the processing instructions there). |
|
53 | - * This is also reflected in the backend modules and allows you a much clearer and powerful way to work with the crawler extension. |
|
54 | - */ |
|
55 | - // $this->cli_options[] = array('-proc listOfProcInstr', 'Comma list of processing instructions. These are the "actions" carried out when crawling and you must specify at least one. Depends on third-party extensions. Examples are "tx_cachemgm_recache" from "cachemgm" extension (will recache pages), "tx_staticpub_publish" from "staticpub" (publishing pages to static files) or "tx_indexedsearch_reindex" from "indexed_search" (indexes pages).'); |
|
56 | - // TODO: cleanup here! |
|
57 | - $this->cli_options[] = array('-d depth', 'Tree depth, 0-99', "How many levels under the 'page_id' to include."); |
|
58 | - $this->cli_options[] = array('-o mode', 'Output mode: "url", "exec", "queue"', "Specifies output modes\nurl : Will list URLs which wget could use as input.\nqueue: Will put entries in queue table.\nexec: Will execute all entries right away!"); |
|
59 | - $this->cli_options[] = array('-n number', 'Number of items per minute.', 'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"'); |
|
60 | - $this->cli_options[] = array('-conf configurationkeys','List of Configuration Keys','A commaseperated list of crawler configurations'); |
|
61 | - # $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors"); |
|
48 | + // Adding options to help archive: |
|
49 | + /** |
|
50 | + * We removed the "proc" option as it seemd not to be working any more. But as the complete handling of the crawler has changed regarding the configuration |
|
51 | + * this is completely ok. Since configuration records were introduced to configure "what should be done" additionally to page ts the way to setup jobs |
|
52 | + * has drifted from selecting filtering processing instructions to selecting/filtering configuration keys (you can configure the processing instructions there). |
|
53 | + * This is also reflected in the backend modules and allows you a much clearer and powerful way to work with the crawler extension. |
|
54 | + */ |
|
55 | + // $this->cli_options[] = array('-proc listOfProcInstr', 'Comma list of processing instructions. These are the "actions" carried out when crawling and you must specify at least one. Depends on third-party extensions. Examples are "tx_cachemgm_recache" from "cachemgm" extension (will recache pages), "tx_staticpub_publish" from "staticpub" (publishing pages to static files) or "tx_indexedsearch_reindex" from "indexed_search" (indexes pages).'); |
|
56 | + // TODO: cleanup here! |
|
57 | + $this->cli_options[] = array('-d depth', 'Tree depth, 0-99', "How many levels under the 'page_id' to include."); |
|
58 | + $this->cli_options[] = array('-o mode', 'Output mode: "url", "exec", "queue"', "Specifies output modes\nurl : Will list URLs which wget could use as input.\nqueue: Will put entries in queue table.\nexec: Will execute all entries right away!"); |
|
59 | + $this->cli_options[] = array('-n number', 'Number of items per minute.', 'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"'); |
|
60 | + $this->cli_options[] = array('-conf configurationkeys','List of Configuration Keys','A commaseperated list of crawler configurations'); |
|
61 | + # $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors"); |
|
62 | 62 | |
63 | - // Setting help texts: |
|
64 | - $this->cli_help['name'] = 'crawler CLI interface -- Submitting URLs to be crawled via CLI interface.'; |
|
65 | - $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
|
66 | - $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It can put entries in the queue from command line options, return the list of URLs and even execute all entries right away without having to queue them up - this can be useful for immediate re-cache, re-indexing or static publishing from command line."; |
|
67 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_im 7 -d=2 -conf=<configurationKey> -o=exec\nWill re-cache pages from page 7 and two levels down, executed immediately.\n"; |
|
68 | - $this->cli_help['examples'].= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; |
|
69 | - $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
|
70 | - } |
|
63 | + // Setting help texts: |
|
64 | + $this->cli_help['name'] = 'crawler CLI interface -- Submitting URLs to be crawled via CLI interface.'; |
|
65 | + $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
|
66 | + $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It can put entries in the queue from command line options, return the list of URLs and even execute all entries right away without having to queue them up - this can be useful for immediate re-cache, re-indexing or static publishing from command line."; |
|
67 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_im 7 -d=2 -conf=<configurationKey> -o=exec\nWill re-cache pages from page 7 and two levels down, executed immediately.\n"; |
|
68 | + $this->cli_help['examples'].= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; |
|
69 | + $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
|
70 | + } |
|
71 | 71 | } |
72 | 72 | |
73 | 73 | if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']) { |
74 | - include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']); |
|
74 | + include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']); |
|
75 | 75 | } |
76 | 76 | |
77 | 77 | ?> |
@@ -57,7 +57,7 @@ discard block |
||
57 | 57 | $this->cli_options[] = array('-d depth', 'Tree depth, 0-99', "How many levels under the 'page_id' to include."); |
58 | 58 | $this->cli_options[] = array('-o mode', 'Output mode: "url", "exec", "queue"', "Specifies output modes\nurl : Will list URLs which wget could use as input.\nqueue: Will put entries in queue table.\nexec: Will execute all entries right away!"); |
59 | 59 | $this->cli_options[] = array('-n number', 'Number of items per minute.', 'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"'); |
60 | - $this->cli_options[] = array('-conf configurationkeys','List of Configuration Keys','A commaseperated list of crawler configurations'); |
|
60 | + $this->cli_options[] = array('-conf configurationkeys', 'List of Configuration Keys', 'A commaseperated list of crawler configurations'); |
|
61 | 61 | # $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors"); |
62 | 62 | |
63 | 63 | // Setting help texts: |
@@ -65,7 +65,7 @@ discard block |
||
65 | 65 | $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
66 | 66 | $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It can put entries in the queue from command line options, return the list of URLs and even execute all entries right away without having to queue them up - this can be useful for immediate re-cache, re-indexing or static publishing from command line."; |
67 | 67 | $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_im 7 -d=2 -conf=<configurationKey> -o=exec\nWill re-cache pages from page 7 and two levels down, executed immediately.\n"; |
68 | - $this->cli_help['examples'].= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; |
|
68 | + $this->cli_help['examples'] .= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; |
|
69 | 69 | $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
70 | 70 | } |
71 | 71 | } |
@@ -1,5 +1,7 @@ |
||
1 | 1 | <?php |
2 | -if (!defined('TYPO3_cliMode')) die('You cannot run this script directly!'); |
|
2 | +if (!defined('TYPO3_cliMode')) { |
|
3 | + die('You cannot run this script directly!'); |
|
4 | +} |
|
3 | 5 | |
4 | 6 | $crawlerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('tx_crawler_lib'); |
5 | 7 | $crawlerObj->CLI_main_flush($_SERVER["argv"]); |
@@ -5,20 +5,20 @@ discard block |
||
5 | 5 | * Retrieve path (taken from cli_dispatch.phpsh) |
6 | 6 | */ |
7 | 7 | |
8 | - // Get path to this script |
|
8 | + // Get path to this script |
|
9 | 9 | $tempPathThisScript = isset($_SERVER['argv'][0]) ? $_SERVER['argv'][0] : (isset($_ENV['_']) ? $_ENV['_'] : $_SERVER['_']); |
10 | 10 | |
11 | - // Resolve path |
|
11 | + // Resolve path |
|
12 | 12 | if (!isAbsPath($tempPathThisScript)) { |
13 | - $workingDirectory = $_SERVER['PWD'] ? $_SERVER['PWD'] : getcwd(); |
|
14 | - if ($workingDirectory) { |
|
15 | - $tempPathThisScript = $workingDirectory . '/' . preg_replace('/\.\//', '', $tempPathThisScript); |
|
16 | - if (!@is_file($tempPathThisScript)) { |
|
17 | - die('Relative path found, but an error occured during resolving of the absolute path: ' . $tempPathThisScript . PHP_EOL); |
|
18 | - } |
|
19 | - } else { |
|
20 | - die('Relative path found, but resolving absolute path is not supported on this platform.' . PHP_EOL); |
|
21 | - } |
|
13 | + $workingDirectory = $_SERVER['PWD'] ? $_SERVER['PWD'] : getcwd(); |
|
14 | + if ($workingDirectory) { |
|
15 | + $tempPathThisScript = $workingDirectory . '/' . preg_replace('/\.\//', '', $tempPathThisScript); |
|
16 | + if (!@is_file($tempPathThisScript)) { |
|
17 | + die('Relative path found, but an error occured during resolving of the absolute path: ' . $tempPathThisScript . PHP_EOL); |
|
18 | + } |
|
19 | + } else { |
|
20 | + die('Relative path found, but resolving absolute path is not supported on this platform.' . PHP_EOL); |
|
21 | + } |
|
22 | 22 | } |
23 | 23 | |
24 | 24 | $typo3Root = preg_replace('#typo3conf/ext/crawler/cli/bootstrap.php$#', '', $tempPathThisScript); |
@@ -30,33 +30,33 @@ discard block |
||
30 | 30 | */ |
31 | 31 | $additionalHeaders = unserialize(base64_decode($_SERVER['argv'][3])); |
32 | 32 | if (is_array($additionalHeaders)) { |
33 | - foreach ($additionalHeaders as $additionalHeader) { |
|
34 | - if (strpos($additionalHeader, ':') !== FALSE) { |
|
35 | - list($key, $value) = explode(':', $additionalHeader, 2); |
|
36 | - $key = str_replace('-', '_', strtoupper(trim($key))); |
|
37 | - if ($key != 'HOST') { |
|
38 | - $_SERVER['HTTP_' . $key] = $value; |
|
39 | - } |
|
40 | - } |
|
41 | - } |
|
33 | + foreach ($additionalHeaders as $additionalHeader) { |
|
34 | + if (strpos($additionalHeader, ':') !== FALSE) { |
|
35 | + list($key, $value) = explode(':', $additionalHeader, 2); |
|
36 | + $key = str_replace('-', '_', strtoupper(trim($key))); |
|
37 | + if ($key != 'HOST') { |
|
38 | + $_SERVER['HTTP_' . $key] = $value; |
|
39 | + } |
|
40 | + } |
|
41 | + } |
|
42 | 42 | } |
43 | 43 | |
44 | 44 | |
45 | - // put parsed query parts into $_GET array |
|
45 | + // put parsed query parts into $_GET array |
|
46 | 46 | $urlParts = parse_url($_SERVER['argv'][2]); |
47 | - // Populating $_GET |
|
47 | + // Populating $_GET |
|
48 | 48 | parse_str($urlParts['query'], $_GET); |
49 | - // Populating $_REQUEST |
|
49 | + // Populating $_REQUEST |
|
50 | 50 | parse_str($urlParts['query'], $_REQUEST); |
51 | - // Populating $_POST |
|
51 | + // Populating $_POST |
|
52 | 52 | $_POST = array(); |
53 | - // Populating $_COOKIE |
|
53 | + // Populating $_COOKIE |
|
54 | 54 | $_COOKIE = array(); |
55 | 55 | |
56 | - // Get the TYPO3_SITE_PATH of the website frontend: |
|
56 | + // Get the TYPO3_SITE_PATH of the website frontend: |
|
57 | 57 | $typo3SitePath = $_SERVER['argv'][1]; |
58 | 58 | |
59 | - // faking the environment |
|
59 | + // faking the environment |
|
60 | 60 | $_SERVER['DOCUMENT_ROOT'] = preg_replace('#' . preg_quote($typo3SitePath, '#') . '$#', '', $typo3Root); |
61 | 61 | $_SERVER['HTTP_USER_AGENT'] = 'CLI Mode'; |
62 | 62 | $_SERVER['HTTP_HOST'] = $_SERVER['SERVER_NAME'] = $urlParts['host']; |
@@ -66,15 +66,15 @@ discard block |
||
66 | 66 | $_SERVER['REQUEST_URI'] = $urlParts['path'] . (isset($urlParts['query']) ? '?' . $urlParts['query'] : ''); |
67 | 67 | $_SERVER['REQUEST_METHOD'] = 'GET'; |
68 | 68 | |
69 | - // Define a port if used in the URL: |
|
69 | + // Define a port if used in the URL: |
|
70 | 70 | if (isset($urlParts['port'])) { |
71 | - $_SERVER['HTTP_HOST'] .= ':' . $urlParts['port']; |
|
72 | - $_SERVER['SERVER_PORT'] = $urlParts['port']; |
|
71 | + $_SERVER['HTTP_HOST'] .= ':' . $urlParts['port']; |
|
72 | + $_SERVER['SERVER_PORT'] = $urlParts['port']; |
|
73 | 73 | } |
74 | 74 | |
75 | - // Define HTTPS disposal: |
|
75 | + // Define HTTPS disposal: |
|
76 | 76 | if ($urlParts['scheme'] === 'https') { |
77 | - $_SERVER['HTTPS'] = 'on'; |
|
77 | + $_SERVER['HTTPS'] = 'on'; |
|
78 | 78 | } |
79 | 79 | |
80 | 80 | chdir($typo3Root); |
@@ -88,11 +88,11 @@ discard block |
||
88 | 88 | * @return boolean |
89 | 89 | */ |
90 | 90 | function isAbsPath($path) { |
91 | - // on Windows also a path starting with a drive letter is absolute: X:/ |
|
92 | - if (stristr(PHP_OS, 'win') && substr($path, 1, 2) === ':/') { |
|
93 | - return TRUE; |
|
94 | - } |
|
91 | + // on Windows also a path starting with a drive letter is absolute: X:/ |
|
92 | + if (stristr(PHP_OS, 'win') && substr($path, 1, 2) === ':/') { |
|
93 | + return TRUE; |
|
94 | + } |
|
95 | 95 | |
96 | - // path starting with a / is always absolute, on every system |
|
97 | - return (substr($path, 0, 1) === '/'); |
|
96 | + // path starting with a / is always absolute, on every system |
|
97 | + return (substr($path, 0, 1) === '/'); |
|
98 | 98 | } |
@@ -12,12 +12,12 @@ discard block |
||
12 | 12 | if (!isAbsPath($tempPathThisScript)) { |
13 | 13 | $workingDirectory = $_SERVER['PWD'] ? $_SERVER['PWD'] : getcwd(); |
14 | 14 | if ($workingDirectory) { |
15 | - $tempPathThisScript = $workingDirectory . '/' . preg_replace('/\.\//', '', $tempPathThisScript); |
|
15 | + $tempPathThisScript = $workingDirectory.'/'.preg_replace('/\.\//', '', $tempPathThisScript); |
|
16 | 16 | if (!@is_file($tempPathThisScript)) { |
17 | - die('Relative path found, but an error occured during resolving of the absolute path: ' . $tempPathThisScript . PHP_EOL); |
|
17 | + die('Relative path found, but an error occured during resolving of the absolute path: '.$tempPathThisScript.PHP_EOL); |
|
18 | 18 | } |
19 | 19 | } else { |
20 | - die('Relative path found, but resolving absolute path is not supported on this platform.' . PHP_EOL); |
|
20 | + die('Relative path found, but resolving absolute path is not supported on this platform.'.PHP_EOL); |
|
21 | 21 | } |
22 | 22 | } |
23 | 23 | |
@@ -35,7 +35,7 @@ discard block |
||
35 | 35 | list($key, $value) = explode(':', $additionalHeader, 2); |
36 | 36 | $key = str_replace('-', '_', strtoupper(trim($key))); |
37 | 37 | if ($key != 'HOST') { |
38 | - $_SERVER['HTTP_' . $key] = $value; |
|
38 | + $_SERVER['HTTP_'.$key] = $value; |
|
39 | 39 | } |
40 | 40 | } |
41 | 41 | } |
@@ -57,18 +57,18 @@ discard block |
||
57 | 57 | $typo3SitePath = $_SERVER['argv'][1]; |
58 | 58 | |
59 | 59 | // faking the environment |
60 | -$_SERVER['DOCUMENT_ROOT'] = preg_replace('#' . preg_quote($typo3SitePath, '#') . '$#', '', $typo3Root); |
|
60 | +$_SERVER['DOCUMENT_ROOT'] = preg_replace('#'.preg_quote($typo3SitePath, '#').'$#', '', $typo3Root); |
|
61 | 61 | $_SERVER['HTTP_USER_AGENT'] = 'CLI Mode'; |
62 | 62 | $_SERVER['HTTP_HOST'] = $_SERVER['SERVER_NAME'] = $urlParts['host']; |
63 | -$_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'] = $typo3SitePath . 'index.php'; |
|
64 | -$_SERVER['SCRIPT_FILENAME'] = $_SERVER['PATH_TRANSLATED'] = $typo3Root . 'index.php'; |
|
63 | +$_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'] = $typo3SitePath.'index.php'; |
|
64 | +$_SERVER['SCRIPT_FILENAME'] = $_SERVER['PATH_TRANSLATED'] = $typo3Root.'index.php'; |
|
65 | 65 | $_SERVER['QUERY_STRING'] = (isset($urlParts['query']) ? $urlParts['query'] : ''); |
66 | -$_SERVER['REQUEST_URI'] = $urlParts['path'] . (isset($urlParts['query']) ? '?' . $urlParts['query'] : ''); |
|
66 | +$_SERVER['REQUEST_URI'] = $urlParts['path'].(isset($urlParts['query']) ? '?'.$urlParts['query'] : ''); |
|
67 | 67 | $_SERVER['REQUEST_METHOD'] = 'GET'; |
68 | 68 | |
69 | 69 | // Define a port if used in the URL: |
70 | 70 | if (isset($urlParts['port'])) { |
71 | - $_SERVER['HTTP_HOST'] .= ':' . $urlParts['port']; |
|
71 | + $_SERVER['HTTP_HOST'] .= ':'.$urlParts['port']; |
|
72 | 72 | $_SERVER['SERVER_PORT'] = $urlParts['port']; |
73 | 73 | } |
74 | 74 | |
@@ -78,7 +78,7 @@ discard block |
||
78 | 78 | } |
79 | 79 | |
80 | 80 | chdir($typo3Root); |
81 | -include($typo3Root . '/index.php'); |
|
81 | +include($typo3Root.'/index.php'); |
|
82 | 82 | |
83 | 83 | |
84 | 84 | /** |
@@ -37,30 +37,30 @@ |
||
37 | 37 | */ |
38 | 38 | class tx_crawler_cli_flush extends \TYPO3\CMS\Core\Controller\CommandLineController { |
39 | 39 | |
40 | - /** |
|
41 | - * Constructor |
|
42 | - * |
|
43 | - * @return void |
|
44 | - */ |
|
45 | - function __construct() { |
|
46 | - parent::__construct(); |
|
40 | + /** |
|
41 | + * Constructor |
|
42 | + * |
|
43 | + * @return void |
|
44 | + */ |
|
45 | + function __construct() { |
|
46 | + parent::__construct(); |
|
47 | 47 | |
48 | - // Adding options to help archive: |
|
49 | - $this->cli_options[] = array('-o mode', 'Output mode: "finished", "all", "pending"', "Specifies the type queue entries which is flushed in the process."); |
|
50 | - # $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors"); |
|
48 | + // Adding options to help archive: |
|
49 | + $this->cli_options[] = array('-o mode', 'Output mode: "finished", "all", "pending"', "Specifies the type queue entries which is flushed in the process."); |
|
50 | + # $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors"); |
|
51 | 51 | |
52 | - // Setting help texts: |
|
53 | - $this->cli_help['name'] = 'crawler CLI interface -- Cleaning up the queue.'; |
|
54 | - $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
|
55 | - $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It will remove queue entires and perform a cleanup."; |
|
56 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 5 -o=finished\nWill remove all finished queue-entries in the sub-branch of page 5\n"; |
|
57 | - $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 0 -o=all\nWill remove all queue-entries for every page\n"; |
|
58 | - $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
|
59 | - } |
|
52 | + // Setting help texts: |
|
53 | + $this->cli_help['name'] = 'crawler CLI interface -- Cleaning up the queue.'; |
|
54 | + $this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
|
55 | + $this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It will remove queue entires and perform a cleanup."; |
|
56 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 5 -o=finished\nWill remove all finished queue-entries in the sub-branch of page 5\n"; |
|
57 | + $this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_flush 0 -o=all\nWill remove all queue-entries for every page\n"; |
|
58 | + $this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
|
59 | + } |
|
60 | 60 | } |
61 | 61 | |
62 | 62 | if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_flush.php']) { |
63 | - include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_flush.php']); |
|
63 | + include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_flush.php']); |
|
64 | 64 | } |
65 | 65 | |
66 | 66 | ?> |
@@ -24,43 +24,43 @@ |
||
24 | 24 | |
25 | 25 | abstract class tx_crawler_domain_lib_abstract_dbobject { |
26 | 26 | |
27 | - /** |
|
28 | - * @var array |
|
29 | - */ |
|
30 | - protected $row; |
|
27 | + /** |
|
28 | + * @var array |
|
29 | + */ |
|
30 | + protected $row; |
|
31 | 31 | |
32 | - /** |
|
33 | - * @var string table name |
|
34 | - */ |
|
35 | - protected static $tableName; |
|
32 | + /** |
|
33 | + * @var string table name |
|
34 | + */ |
|
35 | + protected static $tableName; |
|
36 | 36 | |
37 | - /** |
|
38 | - * Constructor |
|
39 | - * |
|
40 | - * @param array $row optional array with propertys |
|
41 | - */ |
|
42 | - public function __construct($row = array()) { |
|
43 | - $this->row = $row; |
|
44 | - } |
|
37 | + /** |
|
38 | + * Constructor |
|
39 | + * |
|
40 | + * @param array $row optional array with propertys |
|
41 | + */ |
|
42 | + public function __construct($row = array()) { |
|
43 | + $this->row = $row; |
|
44 | + } |
|
45 | 45 | |
46 | - /** |
|
47 | - * Get table name |
|
48 | - * |
|
49 | - * @param void |
|
50 | - * @return string table name |
|
51 | - */ |
|
52 | - public static function getTableName(){ |
|
53 | - return self::$tableName; |
|
54 | - } |
|
46 | + /** |
|
47 | + * Get table name |
|
48 | + * |
|
49 | + * @param void |
|
50 | + * @return string table name |
|
51 | + */ |
|
52 | + public static function getTableName(){ |
|
53 | + return self::$tableName; |
|
54 | + } |
|
55 | 55 | |
56 | - /** |
|
57 | - * Returns the propertys of the object as array |
|
58 | - * |
|
59 | - * @return array |
|
60 | - */ |
|
61 | - public function getRow() { |
|
62 | - return $this->row; |
|
63 | - } |
|
56 | + /** |
|
57 | + * Returns the propertys of the object as array |
|
58 | + * |
|
59 | + * @return array |
|
60 | + */ |
|
61 | + public function getRow() { |
|
62 | + return $this->row; |
|
63 | + } |
|
64 | 64 | |
65 | 65 | |
66 | 66 | } |
@@ -49,7 +49,7 @@ |
||
49 | 49 | * @param void |
50 | 50 | * @return string table name |
51 | 51 | */ |
52 | - public static function getTableName(){ |
|
52 | + public static function getTableName() { |
|
53 | 53 | return self::$tableName; |
54 | 54 | } |
55 | 55 |
@@ -24,19 +24,19 @@ |
||
24 | 24 | |
25 | 25 | class tx_crawler_domain_queue_entry extends tx_crawler_domain_lib_abstract_dbobject { |
26 | 26 | |
27 | - /** |
|
28 | - * @var string table name |
|
29 | - */ |
|
30 | - protected static $tableName = 'tx_crawler_queue'; |
|
27 | + /** |
|
28 | + * @var string table name |
|
29 | + */ |
|
30 | + protected static $tableName = 'tx_crawler_queue'; |
|
31 | 31 | |
32 | - /** |
|
33 | - * Returns the execution time of the record as int value |
|
34 | - * |
|
35 | - * @return int |
|
36 | - */ |
|
37 | - public function getExecutionTime(){ |
|
38 | - return $this->row['exec_time']; |
|
39 | - } |
|
32 | + /** |
|
33 | + * Returns the execution time of the record as int value |
|
34 | + * |
|
35 | + * @return int |
|
36 | + */ |
|
37 | + public function getExecutionTime(){ |
|
38 | + return $this->row['exec_time']; |
|
39 | + } |
|
40 | 40 | |
41 | 41 | } |
42 | 42 |
@@ -34,7 +34,7 @@ |
||
34 | 34 | * |
35 | 35 | * @return int |
36 | 36 | */ |
37 | - public function getExecutionTime(){ |
|
37 | + public function getExecutionTime() { |
|
38 | 38 | return $this->row['exec_time']; |
39 | 39 | } |
40 | 40 |
@@ -69,7 +69,7 @@ |
||
69 | 69 | { |
70 | 70 | $db = $this->getDB(); |
71 | 71 | $where = 'process_id_completed=' . $db->fullQuoteStr($process->getProcess_id(), $this->tableName) . |
72 | - ' AND exec_time > 0 '; |
|
72 | + ' AND exec_time > 0 '; |
|
73 | 73 | $limit = 1; |
74 | 74 | $groupby = ''; |
75 | 75 |
@@ -68,7 +68,7 @@ discard block |
||
68 | 68 | protected function getFirstOrLastObjectByProcess($process, $orderby) |
69 | 69 | { |
70 | 70 | $db = $this->getDB(); |
71 | - $where = 'process_id_completed=' . $db->fullQuoteStr($process->getProcess_id(), $this->tableName) . |
|
71 | + $where = 'process_id_completed='.$db->fullQuoteStr($process->getProcess_id(), $this->tableName). |
|
72 | 72 | ' AND exec_time > 0 '; |
73 | 73 | $limit = 1; |
74 | 74 | $groupby = ''; |
@@ -93,7 +93,7 @@ discard block |
||
93 | 93 | */ |
94 | 94 | public function countExecutedItemsByProcess($process) |
95 | 95 | { |
96 | - return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = ' . $this->getDB()->fullQuoteStr($process->getProcess_id(), |
|
96 | + return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = '.$this->getDB()->fullQuoteStr($process->getProcess_id(), |
|
97 | 97 | $this->tableName)); |
98 | 98 | } |
99 | 99 | |
@@ -106,7 +106,7 @@ discard block |
||
106 | 106 | */ |
107 | 107 | public function countNonExecutedItemsByProcess($process) |
108 | 108 | { |
109 | - return $this->countItemsByWhereClause('exec_time = 0 AND process_id = ' . $this->getDB()->fullQuoteStr($process->getProcess_id(), |
|
109 | + return $this->countItemsByWhereClause('exec_time = 0 AND process_id = '.$this->getDB()->fullQuoteStr($process->getProcess_id(), |
|
110 | 110 | $this->tableName)); |
111 | 111 | } |
112 | 112 | |
@@ -120,7 +120,7 @@ discard block |
||
120 | 120 | */ |
121 | 121 | public function countAllPendingItems() |
122 | 122 | { |
123 | - return $this->countItemsByWhereClause('exec_time = 0 AND scheduled < ' . time()); |
|
123 | + return $this->countItemsByWhereClause('exec_time = 0 AND scheduled < '.time()); |
|
124 | 124 | } |
125 | 125 | |
126 | 126 | /** |
@@ -133,7 +133,7 @@ discard block |
||
133 | 133 | */ |
134 | 134 | public function countAllAssignedPendingItems() |
135 | 135 | { |
136 | - return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id != ''"); |
|
136 | + return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < ".time()." AND process_id != ''"); |
|
137 | 137 | } |
138 | 138 | |
139 | 139 | /** |
@@ -146,7 +146,7 @@ discard block |
||
146 | 146 | */ |
147 | 147 | public function countAllUnassignedPendingItems() |
148 | 148 | { |
149 | - return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id = ''"); |
|
149 | + return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < ".time()." AND process_id = ''"); |
|
150 | 150 | } |
151 | 151 | |
152 | 152 | /** |
@@ -176,7 +176,7 @@ discard block |
||
176 | 176 | $res = $db->exec_SELECTquery( |
177 | 177 | "configuration, count(*) as unprocessed, sum(process_id != '') as assignedButUnprocessed", |
178 | 178 | $this->tableName, |
179 | - 'exec_time = 0 AND scheduled < ' . time(), |
|
179 | + 'exec_time = 0 AND scheduled < '.time(), |
|
180 | 180 | 'configuration' |
181 | 181 | ); |
182 | 182 | $rows = array(); |
@@ -200,7 +200,7 @@ discard block |
||
200 | 200 | $res = $db->exec_SELECTquery( |
201 | 201 | 'set_id', |
202 | 202 | $this->tableName, |
203 | - 'exec_time = 0 AND scheduled < ' . time(), |
|
203 | + 'exec_time = 0 AND scheduled < '.time(), |
|
204 | 204 | 'set_id' |
205 | 205 | ); |
206 | 206 | $setIds = array(); |
@@ -226,7 +226,7 @@ discard block |
||
226 | 226 | $res = $db->exec_SELECTquery( |
227 | 227 | 'configuration, count(*) as c', |
228 | 228 | $this->tableName, |
229 | - 'set_id in (' . implode(',', $setIds) . ') AND scheduled < ' . time(), |
|
229 | + 'set_id in ('.implode(',', $setIds).') AND scheduled < '.time(), |
|
230 | 230 | 'configuration' |
231 | 231 | ); |
232 | 232 | while ($row = $db->sql_fetch_assoc($res)) { |
@@ -306,7 +306,7 @@ discard block |
||
306 | 306 | $res = $db->exec_SELECTquery( |
307 | 307 | 'process_id_completed, min(exec_time) as start, max(exec_time) as end, count(*) as urlcount', |
308 | 308 | $this->tableName, |
309 | - 'exec_time != 0 and exec_time >= ' . intval($start) . ' and exec_time <= ' . intval($end), |
|
309 | + 'exec_time != 0 and exec_time >= '.intval($start).' and exec_time <= '.intval($end), |
|
310 | 310 | 'process_id_completed' |
311 | 311 | ); |
312 | 312 |
@@ -24,90 +24,90 @@ |
||
24 | 24 | |
25 | 25 | class tx_crawler_domain_reason extends tx_crawler_domain_lib_abstract_dbobject { |
26 | 26 | |
27 | - protected static $tableName = 'tx_crawler_reason'; |
|
27 | + protected static $tableName = 'tx_crawler_reason'; |
|
28 | 28 | |
29 | - /** |
|
30 | - * THE CONSTANTS REPRESENT THE KIND OF THE REASON |
|
31 | - * |
|
32 | - * Convention for own states: <extensionkey>_<reason> |
|
33 | - */ |
|
34 | - const REASON_DEFAULT = 'crawler_default_reason'; |
|
35 | - const REASON_GUI_SUBMIT = 'crawler_gui_submit_reason'; |
|
36 | - const REASON_CLI_SUBMIT = 'crawler_cli_submit_reason'; |
|
29 | + /** |
|
30 | + * THE CONSTANTS REPRESENT THE KIND OF THE REASON |
|
31 | + * |
|
32 | + * Convention for own states: <extensionkey>_<reason> |
|
33 | + */ |
|
34 | + const REASON_DEFAULT = 'crawler_default_reason'; |
|
35 | + const REASON_GUI_SUBMIT = 'crawler_gui_submit_reason'; |
|
36 | + const REASON_CLI_SUBMIT = 'crawler_cli_submit_reason'; |
|
37 | 37 | |
38 | - /** |
|
39 | - * Set uid |
|
40 | - * |
|
41 | - * @param int uid |
|
42 | - * @return void |
|
43 | - */ |
|
44 | - public function setUid($uid) { |
|
45 | - $this->row['uid'] = $uid; |
|
46 | - } |
|
38 | + /** |
|
39 | + * Set uid |
|
40 | + * |
|
41 | + * @param int uid |
|
42 | + * @return void |
|
43 | + */ |
|
44 | + public function setUid($uid) { |
|
45 | + $this->row['uid'] = $uid; |
|
46 | + } |
|
47 | 47 | |
48 | - /** |
|
49 | - * Method to set a timestamp for the creation time of this record |
|
50 | - * |
|
51 | - * @param int $time |
|
52 | - */ |
|
53 | - public function setCreationDate($time) { |
|
54 | - $this->row['crdate'] = $time; |
|
55 | - } |
|
48 | + /** |
|
49 | + * Method to set a timestamp for the creation time of this record |
|
50 | + * |
|
51 | + * @param int $time |
|
52 | + */ |
|
53 | + public function setCreationDate($time) { |
|
54 | + $this->row['crdate'] = $time; |
|
55 | + } |
|
56 | 56 | |
57 | - /** |
|
58 | - * This method can be used to set a user id of the user who has created this reason entry |
|
59 | - * |
|
60 | - * @param int $user_id |
|
61 | - */ |
|
62 | - public function setBackendUserId($user_id) { |
|
63 | - $this->row['cruser_id'] = $user_id; |
|
64 | - } |
|
57 | + /** |
|
58 | + * This method can be used to set a user id of the user who has created this reason entry |
|
59 | + * |
|
60 | + * @param int $user_id |
|
61 | + */ |
|
62 | + public function setBackendUserId($user_id) { |
|
63 | + $this->row['cruser_id'] = $user_id; |
|
64 | + } |
|
65 | 65 | |
66 | - /** |
|
67 | - * Method to set the type of the reason for this reason instance (see constances) |
|
68 | - * |
|
69 | - * @param string $string |
|
70 | - */ |
|
71 | - public function setReason($string) { |
|
72 | - $this->row['reason'] = $string; |
|
73 | - } |
|
66 | + /** |
|
67 | + * Method to set the type of the reason for this reason instance (see constances) |
|
68 | + * |
|
69 | + * @param string $string |
|
70 | + */ |
|
71 | + public function setReason($string) { |
|
72 | + $this->row['reason'] = $string; |
|
73 | + } |
|
74 | 74 | |
75 | - /** |
|
76 | - * This method returns the attached reason text. |
|
77 | - * @return string |
|
78 | - */ |
|
79 | - public function getReason() { |
|
80 | - return $this->row['reason']; |
|
81 | - } |
|
75 | + /** |
|
76 | + * This method returns the attached reason text. |
|
77 | + * @return string |
|
78 | + */ |
|
79 | + public function getReason() { |
|
80 | + return $this->row['reason']; |
|
81 | + } |
|
82 | 82 | |
83 | - /** |
|
84 | - * This method can be used to assign a detail text to the crawler reason |
|
85 | - * |
|
86 | - * @param string $detail_text |
|
87 | - */ |
|
88 | - public function setDetailText($detail_text) { |
|
89 | - $this->row['detail_text'] = $detail_text; |
|
90 | - } |
|
83 | + /** |
|
84 | + * This method can be used to assign a detail text to the crawler reason |
|
85 | + * |
|
86 | + * @param string $detail_text |
|
87 | + */ |
|
88 | + public function setDetailText($detail_text) { |
|
89 | + $this->row['detail_text'] = $detail_text; |
|
90 | + } |
|
91 | 91 | |
92 | - /** |
|
93 | - * Returns the attachet detail text. |
|
94 | - * |
|
95 | - * @param void |
|
96 | - * @return string |
|
97 | - */ |
|
98 | - public function getDetailText() { |
|
99 | - return $this->row['detail_text']; |
|
100 | - } |
|
92 | + /** |
|
93 | + * Returns the attachet detail text. |
|
94 | + * |
|
95 | + * @param void |
|
96 | + * @return string |
|
97 | + */ |
|
98 | + public function getDetailText() { |
|
99 | + return $this->row['detail_text']; |
|
100 | + } |
|
101 | 101 | |
102 | - /** |
|
103 | - * This method is used to set the uid of the queue entry |
|
104 | - * where the reason is relevant for. |
|
105 | - * |
|
106 | - * @param int $entry_uid |
|
107 | - */ |
|
108 | - public function setQueueEntryUid($entry_uid) { |
|
109 | - $this->row['queue_entry_uid'] = $entry_uid; |
|
110 | - } |
|
102 | + /** |
|
103 | + * This method is used to set the uid of the queue entry |
|
104 | + * where the reason is relevant for. |
|
105 | + * |
|
106 | + * @param int $entry_uid |
|
107 | + */ |
|
108 | + public function setQueueEntryUid($entry_uid) { |
|
109 | + $this->row['queue_entry_uid'] = $entry_uid; |
|
110 | + } |
|
111 | 111 | |
112 | 112 | } |
113 | 113 |