1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- encoding: utf-8 -*- |
3
|
|
|
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8: |
4
|
|
|
# Author: Binux<[email protected]> |
5
|
|
|
# http://binux.me |
6
|
|
|
# Created on 2015-01-17 12:32:17 |
7
|
|
|
|
8
|
|
|
import os |
9
|
|
|
import re |
10
|
|
|
import six |
11
|
|
|
import glob |
12
|
|
|
import logging |
13
|
|
|
|
14
|
|
|
from pyspider.database.base.projectdb import ProjectDB as BaseProjectDB |
15
|
|
|
|
16
|
|
|
|
17
|
|
|
class ProjectDB(BaseProjectDB): |
18
|
|
|
"""ProjectDB loading scripts from local file.""" |
19
|
|
|
|
20
|
|
|
def __init__(self, files): |
21
|
|
|
self.files = files |
22
|
|
|
self.projects = {} |
23
|
|
|
self.load_scripts() |
24
|
|
|
|
25
|
|
|
def load_scripts(self): |
26
|
|
|
project_names = set(self.projects.keys()) |
27
|
|
|
for path in self.files: |
28
|
|
|
for filename in glob.glob(path): |
29
|
|
|
name = os.path.splitext(os.path.basename(filename))[0] |
30
|
|
|
if name in project_names: |
31
|
|
|
project_names.remove(name) |
32
|
|
|
updatetime = os.path.getmtime(filename) |
33
|
|
|
if name not in self.projects or updatetime > self.projects[name]['updatetime']: |
34
|
|
|
project = self._build_project(filename) |
35
|
|
|
if not project: |
36
|
|
|
continue |
37
|
|
|
self.projects[project['name']] = project |
38
|
|
|
|
39
|
|
|
for name in project_names: |
40
|
|
|
del self.projects[name] |
41
|
|
|
|
42
|
|
|
rate_re = re.compile(r'^\s*#\s*rate.*?(\d+(\.\d+)?)', re.I | re.M) |
43
|
|
|
burst_re = re.compile(r'^\s*#\s*burst.*?(\d+(\.\d+)?)', re.I | re.M) |
44
|
|
|
|
45
|
|
|
def _build_project(self, filename): |
46
|
|
|
try: |
47
|
|
|
with open(filename) as fp: |
48
|
|
|
script = fp.read() |
49
|
|
|
m = self.rate_re.search(script) |
50
|
|
|
if m: |
51
|
|
|
rate = float(m.group(1)) |
52
|
|
|
else: |
53
|
|
|
rate = 1 |
54
|
|
|
|
55
|
|
|
m = self.burst_re.search(script) |
56
|
|
|
if m: |
57
|
|
|
burst = float(m.group(1)) |
58
|
|
|
else: |
59
|
|
|
burst = 3 |
60
|
|
|
|
61
|
|
|
return { |
62
|
|
|
'name': os.path.splitext(os.path.basename(filename))[0], |
63
|
|
|
'group': None, |
64
|
|
|
'status': 'RUNNING', |
65
|
|
|
'script': script, |
66
|
|
|
'comments': None, |
67
|
|
|
'rate': rate, |
68
|
|
|
'burst': burst, |
69
|
|
|
'updatetime': os.path.getmtime(filename), |
70
|
|
|
} |
71
|
|
|
except OSError as e: |
72
|
|
|
logging.error('loading project script error: %s', e) |
73
|
|
|
return None |
74
|
|
|
|
75
|
|
|
def get_all(self, fields=None): |
76
|
|
|
for projectname in self.projects: |
77
|
|
|
yield self.get(projectname, fields) |
78
|
|
|
|
79
|
|
|
def get(self, name, fields=None): |
80
|
|
|
if name not in self.projects: |
81
|
|
|
return None |
82
|
|
|
project = self.projects[name] |
83
|
|
|
result = {} |
84
|
|
|
for f in fields or project: |
85
|
|
|
if f in project: |
86
|
|
|
result[f] = project[f] |
87
|
|
|
else: |
88
|
|
|
result[f] = None |
89
|
|
|
return result |
90
|
|
|
|
91
|
|
|
def check_update(self, timestamp, fields=None): |
92
|
|
|
self.load_scripts() |
93
|
|
|
for projectname, project in six.iteritems(self.projects): |
94
|
|
|
if project['updatetime'] > timestamp: |
95
|
|
|
yield self.get(projectname, fields) |
96
|
|
|
|