1 | #!/usr/bin/env python |
||
2 | # -*- encoding: utf-8 -*- |
||
3 | # Created on __DATE__ |
||
4 | # Project: __PROJECT_NAME__ |
||
5 | |||
6 | from pyspider.libs.base_handler import * |
||
7 | |||
8 | |||
9 | View Code Duplication | class Handler(BaseHandler): |
|
0 ignored issues
–
show
Duplication
introduced
by
![]() |
|||
10 | crawl_config = { |
||
11 | } |
||
12 | |||
13 | @every(minutes=24 * 60) |
||
14 | def on_start(self): |
||
15 | self.crawl('http://127.0.0.1:14887/pyspider/test.html', callback=self.index_page) |
||
16 | |||
17 | @config(age=10 * 24 * 60 * 60) |
||
18 | def index_page(self, response): |
||
19 | for each in response.doc('a[href^="http"]').items(): |
||
20 | self.crawl(each.attr.href, callback=self.detail_page) |
||
21 | |||
22 | @config(priority=2) |
||
23 | def detail_page(self, response): |
||
24 | return { |
||
25 | "url": response.url, |
||
26 | "title": response.doc('title').text(), |
||
27 | } |
||
28 |