|
1
|
|
|
#!/usr/bin/env python |
|
2
|
|
|
# -*- encoding: utf-8 -*- |
|
3
|
|
|
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8: |
|
4
|
|
|
# Author: Binux<[email protected]> |
|
5
|
|
|
# http://binux.me |
|
6
|
|
|
# Created on 2015-01-17 12:32:17 |
|
7
|
|
|
|
|
8
|
|
|
import os |
|
9
|
|
|
import re |
|
10
|
|
|
import six |
|
11
|
|
|
import glob |
|
12
|
|
|
import logging |
|
13
|
|
|
|
|
14
|
|
|
from pyspider.database.base.projectdb import ProjectDB as BaseProjectDB |
|
15
|
|
|
|
|
16
|
|
|
|
|
17
|
|
|
class ProjectDB(BaseProjectDB): |
|
18
|
|
|
"""ProjectDB loading scripts from local file.""" |
|
19
|
|
|
|
|
20
|
|
|
def __init__(self, files): |
|
21
|
|
|
self.files = files |
|
22
|
|
|
self.projects = {} |
|
23
|
|
|
self.load_scripts() |
|
24
|
|
|
|
|
25
|
|
|
def load_scripts(self): |
|
26
|
|
|
project_names = set(self.projects.keys()) |
|
27
|
|
|
for path in self.files: |
|
28
|
|
|
for filename in glob.glob(path): |
|
29
|
|
|
name = os.path.splitext(os.path.basename(filename))[0] |
|
30
|
|
|
if name in project_names: |
|
31
|
|
|
project_names.remove(name) |
|
32
|
|
|
updatetime = os.path.getmtime(filename) |
|
33
|
|
|
if name not in self.projects or updatetime > self.projects[name]['updatetime']: |
|
34
|
|
|
project = self._build_project(filename) |
|
35
|
|
|
if not project: |
|
36
|
|
|
continue |
|
37
|
|
|
self.projects[project['name']] = project |
|
38
|
|
|
|
|
39
|
|
|
for name in project_names: |
|
40
|
|
|
del self.projects[name] |
|
41
|
|
|
|
|
42
|
|
|
rate_re = re.compile(r'^\s*#\s*rate.*?(\d+(\.\d+)?)', re.I | re.M) |
|
43
|
|
|
burst_re = re.compile(r'^\s*#\s*burst.*?(\d+(\.\d+)?)', re.I | re.M) |
|
44
|
|
|
|
|
45
|
|
|
def _build_project(self, filename): |
|
46
|
|
|
try: |
|
47
|
|
|
with open(filename) as fp: |
|
48
|
|
|
script = fp.read() |
|
49
|
|
|
m = self.rate_re.search(script) |
|
50
|
|
|
if m: |
|
51
|
|
|
rate = float(m.group(1)) |
|
52
|
|
|
else: |
|
53
|
|
|
rate = 1 |
|
54
|
|
|
|
|
55
|
|
|
m = self.burst_re.search(script) |
|
56
|
|
|
if m: |
|
57
|
|
|
burst = float(m.group(1)) |
|
58
|
|
|
else: |
|
59
|
|
|
burst = 3 |
|
60
|
|
|
|
|
61
|
|
|
return { |
|
62
|
|
|
'name': os.path.splitext(os.path.basename(filename))[0], |
|
63
|
|
|
'group': None, |
|
64
|
|
|
'status': 'RUNNING', |
|
65
|
|
|
'script': script, |
|
66
|
|
|
'comments': None, |
|
67
|
|
|
'rate': rate, |
|
68
|
|
|
'burst': burst, |
|
69
|
|
|
'updatetime': os.path.getmtime(filename), |
|
70
|
|
|
} |
|
71
|
|
|
except OSError as e: |
|
72
|
|
|
logging.error('loading project script error: %s', e) |
|
73
|
|
|
return None |
|
74
|
|
|
|
|
75
|
|
|
def get_all(self, fields=None): |
|
76
|
|
|
for projectname in self.projects: |
|
77
|
|
|
yield self.get(projectname, fields) |
|
78
|
|
|
|
|
79
|
|
|
def get(self, name, fields=None): |
|
80
|
|
|
if name not in self.projects: |
|
81
|
|
|
return None |
|
82
|
|
|
project = self.projects[name] |
|
83
|
|
|
result = {} |
|
84
|
|
|
for f in fields or project: |
|
85
|
|
|
if f in project: |
|
86
|
|
|
result[f] = project[f] |
|
87
|
|
|
else: |
|
88
|
|
|
result[f] = None |
|
89
|
|
|
return result |
|
90
|
|
|
|
|
91
|
|
|
def check_update(self, timestamp, fields=None): |
|
92
|
|
|
self.load_scripts() |
|
93
|
|
|
for projectname, project in six.iteritems(self.projects): |
|
94
|
|
|
if project['updatetime'] > timestamp: |
|
95
|
|
|
yield self.get(projectname, fields) |
|
96
|
|
|
|