1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/*************************************************************** |
4
|
|
|
* Copyright notice |
5
|
|
|
* |
6
|
|
|
* (c) 2005 Kasper Skaarhoj ([email protected]) |
7
|
|
|
* All rights reserved |
8
|
|
|
* |
9
|
|
|
* This script is part of the TYPO3 project. The TYPO3 project is |
10
|
|
|
* free software; you can redistribute it and/or modify |
11
|
|
|
* it under the terms of the GNU General Public License as published by |
12
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
13
|
|
|
* (at your option) any later version. |
14
|
|
|
* |
15
|
|
|
* The GNU General Public License can be found at |
16
|
|
|
* http://www.gnu.org/copyleft/gpl.html. |
17
|
|
|
* |
18
|
|
|
* This script is distributed in the hope that it will be useful, |
19
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
20
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21
|
|
|
* GNU General Public License for more details. |
22
|
|
|
* |
23
|
|
|
* This copyright notice MUST APPEAR in all copies of the script! |
24
|
|
|
***************************************************************/ |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* Class tx_crawler_cli_im |
28
|
|
|
*/ |
29
|
|
|
class tx_crawler_cli_im extends \TYPO3\CMS\Core\Controller\CommandLineController |
|
|
|
|
30
|
|
|
{ |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* Constructor |
34
|
|
|
* |
35
|
|
|
* @return void |
36
|
|
|
*/ |
37
|
|
|
public function __construct() |
38
|
|
|
{ |
39
|
|
|
parent::__construct(); |
40
|
|
|
|
41
|
|
|
// Adding options to help archive: |
42
|
|
|
/** |
43
|
|
|
* We removed the "proc" option as it seemd not to be working any more. But as the complete handling of the crawler has changed regarding the configuration |
44
|
|
|
* this is completely ok. Since configuration records were introduced to configure "what should be done" additionally to page ts the way to setup jobs |
45
|
|
|
* has drifted from selecting filtering processing instructions to selecting/filtering configuration keys (you can configure the processing instructions there). |
46
|
|
|
* This is also reflected in the backend modules and allows you a much clearer and powerful way to work with the crawler extension. |
47
|
|
|
*/ |
48
|
|
|
// $this->cli_options[] = array('-proc listOfProcInstr', 'Comma list of processing instructions. These are the "actions" carried out when crawling and you must specify at least one. Depends on third-party extensions. Examples are "tx_cachemgm_recache" from "cachemgm" extension (will recache pages), "tx_staticpub_publish" from "staticpub" (publishing pages to static files) or "tx_indexedsearch_reindex" from "indexed_search" (indexes pages).'); |
49
|
|
|
// TODO: cleanup here! |
50
|
|
|
$this->cli_options[] = ['-d depth', 'Tree depth, 0-99', "How many levels under the 'page_id' to include."]; |
|
|
|
|
51
|
|
|
$this->cli_options[] = ['-o mode', 'Output mode: "url", "exec", "queue"', "Specifies output modes\nurl : Will list URLs which wget could use as input.\nqueue: Will put entries in queue table.\nexec: Will execute all entries right away!"]; |
52
|
|
|
$this->cli_options[] = ['-n number', 'Number of items per minute.', 'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"']; |
53
|
|
|
$this->cli_options[] = ['-conf configurationkeys','List of Configuration Keys','A commaseperated list of crawler configurations']; |
54
|
|
|
# $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors"); |
55
|
|
|
|
56
|
|
|
// Setting help texts: |
57
|
|
|
$this->cli_help['name'] = 'crawler CLI interface -- Submitting URLs to be crawled via CLI interface.'; |
|
|
|
|
58
|
|
|
$this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
59
|
|
|
$this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It can put entries in the queue from command line options, return the list of URLs and even execute all entries right away without having to queue them up - this can be useful for immediate re-cache, re-indexing or static publishing from command line."; |
60
|
|
|
$this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_im 7 -d=2 -conf=<configurationKey> -o=exec\nWill re-cache pages from page 7 and two levels down, executed immediately.\n"; |
61
|
|
|
$this->cli_help['examples'] .= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; |
62
|
|
|
$this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
63
|
|
|
} |
64
|
|
|
} |
65
|
|
|
|
66
|
|
|
if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']) { |
|
|
|
|
67
|
|
|
include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']); |
68
|
|
|
} |
69
|
|
|
|
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths