|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/*************************************************************** |
|
4
|
|
|
* Copyright notice |
|
5
|
|
|
* |
|
6
|
|
|
* (c) 2005 Kasper Skaarhoj ([email protected]) |
|
7
|
|
|
* All rights reserved |
|
8
|
|
|
* |
|
9
|
|
|
* This script is part of the TYPO3 project. The TYPO3 project is |
|
10
|
|
|
* free software; you can redistribute it and/or modify |
|
11
|
|
|
* it under the terms of the GNU General Public License as published by |
|
12
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
|
13
|
|
|
* (at your option) any later version. |
|
14
|
|
|
* |
|
15
|
|
|
* The GNU General Public License can be found at |
|
16
|
|
|
* http://www.gnu.org/copyleft/gpl.html. |
|
17
|
|
|
* |
|
18
|
|
|
* This script is distributed in the hope that it will be useful, |
|
19
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
20
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
21
|
|
|
* GNU General Public License for more details. |
|
22
|
|
|
* |
|
23
|
|
|
* This copyright notice MUST APPEAR in all copies of the script! |
|
24
|
|
|
***************************************************************/ |
|
25
|
|
|
|
|
26
|
|
|
/** |
|
27
|
|
|
* Class tx_crawler_cli_im |
|
28
|
|
|
*/ |
|
29
|
|
|
class tx_crawler_cli_im extends \TYPO3\CMS\Core\Controller\CommandLineController |
|
|
|
|
|
|
30
|
|
|
{ |
|
31
|
|
|
|
|
32
|
|
|
/** |
|
33
|
|
|
* Constructor |
|
34
|
|
|
* |
|
35
|
|
|
* @return void |
|
36
|
|
|
*/ |
|
37
|
|
|
public function __construct() |
|
38
|
|
|
{ |
|
39
|
|
|
parent::__construct(); |
|
40
|
|
|
|
|
41
|
|
|
// Adding options to help archive: |
|
42
|
|
|
/** |
|
43
|
|
|
* We removed the "proc" option as it seemd not to be working any more. But as the complete handling of the crawler has changed regarding the configuration |
|
44
|
|
|
* this is completely ok. Since configuration records were introduced to configure "what should be done" additionally to page ts the way to setup jobs |
|
45
|
|
|
* has drifted from selecting filtering processing instructions to selecting/filtering configuration keys (you can configure the processing instructions there). |
|
46
|
|
|
* This is also reflected in the backend modules and allows you a much clearer and powerful way to work with the crawler extension. |
|
47
|
|
|
*/ |
|
48
|
|
|
// $this->cli_options[] = array('-proc listOfProcInstr', 'Comma list of processing instructions. These are the "actions" carried out when crawling and you must specify at least one. Depends on third-party extensions. Examples are "tx_cachemgm_recache" from "cachemgm" extension (will recache pages), "tx_staticpub_publish" from "staticpub" (publishing pages to static files) or "tx_indexedsearch_reindex" from "indexed_search" (indexes pages).'); |
|
49
|
|
|
// TODO: cleanup here! |
|
50
|
|
|
$this->cli_options[] = ['-d depth', 'Tree depth, 0-99', "How many levels under the 'page_id' to include."]; |
|
|
|
|
|
|
51
|
|
|
$this->cli_options[] = ['-o mode', 'Output mode: "url", "exec", "queue"', "Specifies output modes\nurl : Will list URLs which wget could use as input.\nqueue: Will put entries in queue table.\nexec: Will execute all entries right away!"]; |
|
52
|
|
|
$this->cli_options[] = ['-n number', 'Number of items per minute.', 'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"']; |
|
53
|
|
|
$this->cli_options[] = ['-conf configurationkeys','List of Configuration Keys','A commaseperated list of crawler configurations']; |
|
54
|
|
|
# $this->cli_options[] = array('-v level', 'Verbosity level 0-3', "The value of level can be:\n 0 = all output\n 1 = info and greater (default)\n 2 = warnings and greater\n 3 = errors"); |
|
55
|
|
|
|
|
56
|
|
|
// Setting help texts: |
|
57
|
|
|
$this->cli_help['name'] = 'crawler CLI interface -- Submitting URLs to be crawled via CLI interface.'; |
|
|
|
|
|
|
58
|
|
|
$this->cli_help['synopsis'] = 'page_id ###OPTIONS###'; |
|
59
|
|
|
$this->cli_help['description'] = "Works as a CLI interface to some functionality from the Web > Info > Site Crawler module; It can put entries in the queue from command line options, return the list of URLs and even execute all entries right away without having to queue them up - this can be useful for immediate re-cache, re-indexing or static publishing from command line."; |
|
60
|
|
|
$this->cli_help['examples'] = "/.../cli_dispatch.phpsh crawler_im 7 -d=2 -conf=<configurationKey> -o=exec\nWill re-cache pages from page 7 and two levels down, executed immediately.\n"; |
|
61
|
|
|
$this->cli_help['examples'] .= "/.../cli_dispatch.phpsh crawler_im 7 -d=0 -conf=<configurationKey> -n=4 -o=queue\nWill put entries for re-caching pages from page 7 into queue, 4 every minute.\n"; |
|
62
|
|
|
$this->cli_help['author'] = 'Kasper Skaarhoj, Daniel Poetzinger, Fabrizio Branca, Tolleiv Nietsch, Timo Schmidt - AOE media 2009'; |
|
63
|
|
|
} |
|
64
|
|
|
} |
|
65
|
|
|
|
|
66
|
|
|
if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']) { |
|
|
|
|
|
|
67
|
|
|
include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/crawler/cli/class.tx_crawler_cli_im.php']); |
|
68
|
|
|
} |
|
69
|
|
|
|
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths