Completed
Push — master ( b244b2...c54284 )
by Roy
59s
created

pyspider.database.base.TaskDB.copy()   A

Complexity

Conditions 1

Size

Total Lines 9

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 9
rs 9.6667
1
#!/usr/bin/env python
2
# -*- encoding: utf-8 -*-
3
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
4
# Author: Binux<[email protected]>
5
#         http://binux.me
6
# Created on 2014-02-08 10:28:48
7
8
# task schema
9
{
10
    'task': {
11
        'taskid': str,  # new, not change
12
        'project': str,  # new, not change
13
        'url': str,  # new, not change
14
        'status': int,  # change
15
        'schedule': {
16
            'priority': int,
17
            'retries': int,
18
            'retried': int,
19
            'exetime': int,
20
            'age': int,
21
            'itag': str,
22
            # 'recrawl': int
23
        },  # new and restart
24
        'fetch': {
25
            'method': str,
26
            'headers': dict,
27
            'data': str,
28
            'timeout': int,
29
            'save': dict,
30
        },  # new and restart
31
        'process': {
32
            'callback': str,
33
        },  # new and restart
34
        'track': {
35
            'fetch': {
36
                'ok': bool,
37
                'time': int,
38
                'status_code': int,
39
                'headers': dict,
40
                'encoding': str,
41
                'content': str,
42
            },
43
            'process': {
44
                'ok': bool,
45
                'time': int,
46
                'follows': int,
47
                'outputs': int,
48
                'logs': str,
49
                'exception': str,
50
            },
51
            'save': object,  # jsonable object saved by processor
52
        },  # finish
53
        'lastcrawltime': int,  # keep between request
54
        'updatetime': int,  # keep between request
55
    }
56
}
57
58
59
class TaskDB(object):
60
    ACTIVE = 1
61
    SUCCESS = 2
62
    FAILED = 3
63
    BAD = 4
64
65
    projects = set()  # projects in taskdb
66
67
    def load_tasks(self, status, project=None, fields=None):
68
        raise NotImplementedError
69
70
    def get_task(self, project, taskid, fields=None):
71
        raise NotImplementedError
72
73
    def status_count(self, project):
74
        '''
75
        return a dict
76
        '''
77
        raise NotImplementedError
78
79
    def insert(self, project, taskid, obj={}):
80
        raise NotImplementedError
81
82
    def update(self, project, taskid, obj={}, **kwargs):
83
        raise NotImplementedError
84
85
    def drop(self, project):
86
        raise NotImplementedError
87
88
    @staticmethod
89
    def status_to_string(status):
90
        return {
91
            1: 'ACTIVE',
92
            2: 'SUCCESS',
93
            3: 'FAILED',
94
            4: 'BAD',
95
        }.get(status, 'UNKNOWN')
96
97
    @staticmethod
98
    def status_to_int(status):
99
        return {
100
            'ACTIVE': 1,
101
            'SUCCESS': 2,
102
            'FAILED': 3,
103
            'BAD': 4,
104
        }.get(status, 4)
105
106
    def copy(self):
107
        '''
108
        database should be able to copy itself to create new connection
109
110
        it's implemented automatically by pyspider.database.connect_database
111
        if you are not create database connection via connect_database method,
112
        you should implement this
113
        '''
114
        raise NotImplementedError
115