| 1 | <?php |
||
| 7 | class JobCrawl extends Job |
||
| 8 | {
|
||
| 9 | /** |
||
| 10 | * Maximum number of pages to crawl |
||
| 11 | * @see http://support.diffbot.com/crawlbot/whats-the-difference-between-crawling-and-processing/ |
||
| 12 | * |
||
| 13 | * @return int |
||
| 14 | */ |
||
| 15 | 1 | public function getMaxToCrawl() |
|
| 19 | |||
| 20 | /** |
||
| 21 | * Maximum number of pages to process |
||
| 22 | * @see http://support.diffbot.com/crawlbot/whats-the-difference-between-crawling-and-processing/ |
||
| 23 | * |
||
| 24 | * @return int |
||
| 25 | */ |
||
| 26 | 1 | public function getMaxToProcess() |
|
| 30 | |||
| 31 | /** |
||
| 32 | * Whether or not the job was set to only process newly found links, |
||
| 33 | * ignoring old but potentially updated ones |
||
| 34 | * |
||
| 35 | * @return bool |
||
| 36 | */ |
||
| 37 | 1 | public function getOnlyProcessIfNew() |
|
| 41 | |||
| 42 | /** |
||
| 43 | * Seed URLs provided to the job. Always returned as array. |
||
| 44 | * |
||
| 45 | * @return array |
||
| 46 | */ |
||
| 47 | 1 | public function getSeeds() |
|
| 51 | } |