Completed
Push — master ( 6a66d3...6a25b4 )
by Roy
01:12
created

pyspider.database.local.ProjectDB.load_scripts()   C

Complexity

Conditions 8

Size

Total Lines 16

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 8
dl 0
loc 16
rs 6.6667
1
#!/usr/bin/env python
2
# -*- encoding: utf-8 -*-
3
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
4
# Author: Binux<[email protected]>
5
#         http://binux.me
6
# Created on 2015-01-17 12:32:17
7
8
import os
9
import re
10
import six
11
import glob
12
import logging
13
14
from pyspider.database.base.projectdb import ProjectDB as BaseProjectDB
15
16
17
class ProjectDB(BaseProjectDB):
18
    """ProjectDB loading scripts from local file."""
19
20
    def __init__(self, files):
21
        self.files = files
22
        self.projects = {}
23
        self.load_scripts()
24
25
    def load_scripts(self):
26
        project_names = set(self.projects.keys())
27
        for path in self.files:
28
            for filename in glob.glob(path):
29
                name = os.path.splitext(os.path.basename(filename))[0]
30
                if name in project_names:
31
                    project_names.remove(name)
32
                updatetime = os.path.getmtime(filename)
33
                if name not in self.projects or updatetime > self.projects[name]['updatetime']:
34
                    project = self._build_project(filename)
35
                    if not project:
36
                        continue
37
                    self.projects[project['name']] = project
38
39
        for name in project_names:
40
            del self.projects[name]
41
42
    rate_re = re.compile(r'^\s*#\s*rate.*?(\d+(\.\d+)?)', re.I | re.M)
43
    burst_re = re.compile(r'^\s*#\s*burst.*?(\d+(\.\d+)?)', re.I | re.M)
44
45
    def _build_project(self, filename):
46
        try:
47
            with open(filename) as fp:
48
                script = fp.read()
49
            m = self.rate_re.search(script)
50
            if m:
51
                rate = float(m.group(1))
52
            else:
53
                rate = 1
54
55
            m = self.burst_re.search(script)
56
            if m:
57
                burst = float(m.group(1))
58
            else:
59
                burst = 3
60
61
            return {
62
                'name': os.path.splitext(os.path.basename(filename))[0],
63
                'group': None,
64
                'status': 'RUNNING',
65
                'script': script,
66
                'comments': None,
67
                'rate': rate,
68
                'burst': burst,
69
                'updatetime': os.path.getmtime(filename),
70
            }
71
        except OSError as e:
72
            logging.error('loading project script error: %s', e)
73
            return None
74
75
    def get_all(self, fields=None):
76
        for projectname in self.projects:
77
            yield self.get(projectname, fields)
78
79
    def get(self, name, fields=None):
80
        if name not in self.projects:
81
            return None
82
        project = self.projects[name]
83
        result = {}
84
        for f in fields or project:
85
            if f in project:
86
                result[f] = project[f]
87
            else:
88
                result[f] = None
89
        return result
90
91
    def check_update(self, timestamp, fields=None):
92
        self.load_scripts()
93
        for projectname, project in six.iteritems(self.projects):
94
            if project['updatetime'] > timestamp:
95
                yield self.get(projectname, fields)
96