ProjectManager._update_project()   A
last analyzed

Complexity

Conditions 2

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
dl 0
loc 6
rs 9.4285
c 0
b 0
f 0
1
#!/usr/bin/env python
2
# -*- encoding: utf-8 -*-
3
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
4
# Author: Binux<[email protected]>
5
#         http://binux.me
6
# Created on 2014-02-16 22:24:20
7
8
import os
9
import six
10
import sys
11
import imp
12
import time
13
import weakref
14
import logging
15
import inspect
16
import traceback
17
import linecache
18
from pyspider.libs import utils
19
from pyspider.libs.log import SaveLogHandler, LogFormatter
20
logger = logging.getLogger("processor")
21
22
23
class ProjectManager(object):
24
    """
25
    load projects from projectdb, update project
26
    """
27
28
    CHECK_PROJECTS_INTERVAL = 5 * 60
29
    RELOAD_PROJECT_INTERVAL = 60 * 60
30
31
    @staticmethod
32
    def build_module(project, env=None):
33
        '''Build project script as module'''
34
        from pyspider.libs import base_handler
35
        assert 'name' in project, 'need name of project'
36
        assert 'script' in project, 'need script of project'
37
38
        if env is None:
39
            env = {}
40
        # fix for old non-package version scripts
41
        pyspider_path = os.path.join(os.path.dirname(__file__), "..")
42
        if pyspider_path not in sys.path:
43
            sys.path.insert(1, pyspider_path)
44
45
        env = dict(env)
46
        env.update({
47
            'debug': project.get('status', 'DEBUG') == 'DEBUG',
48
        })
49
50
        loader = ProjectLoader(project)
51
        module = loader.load_module(project['name'])
52
53
        # logger inject
54
        module.log_buffer = []
55
        module.logging = module.logger = logging.Logger(project['name'])
56
        if env.get('enable_stdout_capture', True):
57
            handler = SaveLogHandler(module.log_buffer)
58
            handler.setFormatter(LogFormatter(color=False))
59
        else:
60
            handler = logging.StreamHandler()
61
            handler.setFormatter(LogFormatter(color=True))
62
        module.logger.addHandler(handler)
63
64
        if '__handler_cls__' not in module.__dict__:
65
            BaseHandler = module.__dict__.get('BaseHandler', base_handler.BaseHandler)
66
            for each in list(six.itervalues(module.__dict__)):
67
                if inspect.isclass(each) and each is not BaseHandler \
68
                        and issubclass(each, BaseHandler):
69
                    module.__dict__['__handler_cls__'] = each
70
        _class = module.__dict__.get('__handler_cls__')
71
        assert _class is not None, "need BaseHandler in project module"
72
73
        instance = _class()
74
        instance.__env__ = env
75
        instance.project_name = project['name']
76
        instance.project = project
77
78
        return {
79
            'loader': loader,
80
            'module': module,
81
            'class': _class,
82
            'instance': instance,
83
            'exception': None,
84
            'exception_log': '',
85
            'info': project,
86
            'load_time': time.time(),
87
        }
88
89
    def __init__(self, projectdb, env):
90
        self.projectdb = projectdb
91
        self.env = env
92
93
        self.projects = {}
94
        self.last_check_projects = time.time()
95
96
    def _need_update(self, project_name, updatetime=None, md5sum=None):
97
        '''Check if project_name need update'''
98
        if project_name not in self.projects:
99
            return True
100
        elif md5sum and md5sum != self.projects[project_name]['info'].get('md5sum'):
101
            return True
102
        elif updatetime and updatetime > self.projects[project_name]['info'].get('updatetime', 0):
103
            return True
104
        elif time.time() - self.projects[project_name]['load_time'] > self.RELOAD_PROJECT_INTERVAL:
105
            return True
106
        return False
107
108
    def _check_projects(self):
109
        '''Check projects by last update time'''
110
        for project in self.projectdb.check_update(self.last_check_projects,
111
                                                   ['name', 'updatetime']):
112
            if project['name'] not in self.projects:
113
                continue
114
            if project['updatetime'] > self.projects[project['name']]['info'].get('updatetime', 0):
115
                self._update_project(project['name'])
116
        self.last_check_projects = time.time()
117
118
    def _update_project(self, project_name):
119
        '''Update one project from database'''
120
        project = self.projectdb.get(project_name)
121
        if not project:
122
            return None
123
        return self._load_project(project)
124
125
    def _load_project(self, project):
126
        '''Load project into self.projects from project info dict'''
127
        try:
128
            project['md5sum'] = utils.md5string(project['script'])
129
            ret = self.build_module(project, self.env)
130
            self.projects[project['name']] = ret
131
        except Exception as e:
132
            logger.exception("load project %s error", project.get('name', None))
133
            ret = {
134
                'loader': None,
135
                'module': None,
136
                'class': None,
137
                'instance': None,
138
                'exception': e,
139
                'exception_log': traceback.format_exc(),
140
                'info': project,
141
                'load_time': time.time(),
142
            }
143
            self.projects[project['name']] = ret
144
            return False
145
        logger.debug('project: %s updated.', project.get('name', None))
146
        return True
147
148
    def get(self, project_name, updatetime=None, md5sum=None):
149
        '''get project data object, return None if not exists'''
150
        if time.time() - self.last_check_projects > self.CHECK_PROJECTS_INTERVAL:
151
            self._check_projects()
152
        if self._need_update(project_name, updatetime, md5sum):
153
            self._update_project(project_name)
154
        return self.projects.get(project_name, None)
155
156
157
class ProjectLoader(object):
158
    '''ProjectLoader class for sys.meta_path'''
159
160
    def __init__(self, project, mod=None):
161
        self.project = project
162
        self.name = project['name']
163
        self.mod = mod
164
        pass
165
166
    def load_module(self, fullname):
167
        if self.mod is None:
168
            self.mod = mod = imp.new_module(fullname)
169
        else:
170
            mod = self.mod
171
        mod.__file__ = '<%s>' % self.name
172
        mod.__loader__ = self
173
        mod.__project__ = self.project
174
        mod.__package__ = ''
175
        code = self.get_code(fullname)
176
        six.exec_(code, mod.__dict__)
177
        linecache.clearcache()
178
        if sys.version_info[:2] == (3, 3):
179
            sys.modules[fullname] = mod
180
        return mod
181
182
    def is_package(self, fullname):
183
        return False
184
185
    def get_code(self, fullname):
186
        return compile(self.get_source(fullname), '<%s>' % self.name, 'exec')
187
188
    def get_source(self, fullname):
189
        script = self.project['script']
190
        if isinstance(script, six.text_type):
191
            return script.encode('utf8')
192
        return script
193
194
195
if six.PY2:
196
    class ProjectFinder(object):
197
        '''ProjectFinder class for sys.meta_path'''
198
199
        def __init__(self, projectdb):
200
            self.get_projectdb = weakref.ref(projectdb)
201
202
        @property
203
        def projectdb(self):
204
            return self.get_projectdb()
205
206
        def find_module(self, fullname, path=None):
207
            if fullname == 'projects':
208
                return self
209
            parts = fullname.split('.')
210
            if len(parts) == 2 and parts[0] == 'projects':
211
                name = parts[1]
212
                if not self.projectdb:
213
                    return
214
                info = self.projectdb.get(name)
215
                if info:
216
                    return ProjectLoader(info)
217
218
        def load_module(self, fullname):
219
            mod = imp.new_module(fullname)
220
            mod.__file__ = '<projects>'
221
            mod.__loader__ = self
222
            mod.__path__ = ['<projects>']
223
            mod.__package__ = 'projects'
224
            return mod
225
226
        def is_package(self, fullname):
227
            return True
228
else:
229
    import importlib.abc
230
231
    class ProjectFinder(importlib.abc.MetaPathFinder):
232
        '''ProjectFinder class for sys.meta_path'''
233
234
        def __init__(self, projectdb):
235
            self.get_projectdb = weakref.ref(projectdb)
236
237
        @property
238
        def projectdb(self):
239
            return self.get_projectdb()
240
241
        def find_spec(self, fullname, path, target=None):
242
            loader = self.find_module(fullname, path)
243
            if loader:
244
                return importlib.util.spec_from_loader(fullname, loader)
245
246
        def find_module(self, fullname, path):
247
            if fullname == 'projects':
248
                return ProjectsLoader()
249
            parts = fullname.split('.')
250
            if len(parts) == 2 and parts[0] == 'projects':
251
                name = parts[1]
252
                if not self.projectdb:
253
                    return
254
                info = self.projectdb.get(name)
255
                if info:
256
                    return ProjectLoader(info)
257
258
    class ProjectsLoader(importlib.abc.InspectLoader):
259
        def load_module(self, fullname):
260
            mod = imp.new_module(fullname)
261
            mod.__file__ = '<projects>'
262
            mod.__loader__ = self
263
            mod.__path__ = ['<projects>']
264
            mod.__package__ = 'projects'
265
            if sys.version_info[:2] == (3, 3):
266
                sys.modules[fullname] = mod
267
            return mod
268
269
        def module_repr(self, module):
270
            return '<Module projects>'
271
272
        def is_package(self, fullname):
273
            return True
274
275
        def get_source(self, path):
276
            return ''
277
278
        def get_code(self, fullname):
279
            return compile(self.get_source(fullname), '<projects>', 'exec')
280
281
    class ProjectLoader(ProjectLoader, importlib.abc.Loader):
282
        def create_module(self, spec):
283
            return self.load_module(spec.name)
284
285
        def exec_module(self, module):
286
            return module
287
288
        def module_repr(self, module):
289
            return '<Module projects.%s>' % self.name
290