Completed
Push — master ( a3ed05...59f3e4 )
by Roy
01:14
created

resizer_js()   A

Complexity

Conditions 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
#!/usr/bin/env python
2
# -*- encoding: utf-8 -*-
3
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
4
# Author: Binux<[email protected]>
5
#         http://binux.me
6
# Created on 2014-02-23 00:19:06
7
8
9
import sys
10
import time
11
import socket
12
import inspect
13
import datetime
14
import traceback
15
from flask import render_template, request, json
16
from flask.ext import login
17
18
from pyspider.libs import utils, sample_handler, dataurl
19
from pyspider.libs.response import rebuild_response
20
from pyspider.processor.project_module import ProjectManager, ProjectFinder
21
from .app import app
22
23
default_task = {
24
    'taskid': 'data:,on_start',
25
    'project': '',
26
    'url': 'data:,on_start',
27
    'process': {
28
        'callback': 'on_start',
29
    },
30
}
31
default_script = inspect.getsource(sample_handler)
32
33
34
@app.route('/debug/<project>', methods=['GET', 'POST'])
35
def debug(project):
36
    projectdb = app.config['projectdb']
37
    if not projectdb.verify_project_name(project):
38
        return 'project name is not allowed!', 400
39
    info = projectdb.get(project, fields=['name', 'script'])
40
    if info:
41
        script = info['script']
42
    else:
43
        script = (default_script
44
                  .replace('__DATE__', datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
45
                  .replace('__PROJECT_NAME__', project)
46
                  .replace('__START_URL__', request.values.get('start-urls') or '__START_URL__'))
47
48
    taskid = request.args.get('taskid')
49
    if taskid:
50
        taskdb = app.config['taskdb']
51
        task = taskdb.get_task(
52
            project, taskid, ['taskid', 'project', 'url', 'fetch', 'process'])
53
    else:
54
        task = default_task
55
56
    default_task['project'] = project
57
    return render_template("debug.html", task=task, script=script, project_name=project)
58
59
60
@app.before_first_request
61
def enable_projects_import():
62
    sys.meta_path.append(ProjectFinder(app.config['projectdb']))
63
64
65
@app.route('/debug/<project>/run', methods=['POST', ])
66
def run(project):
67
    start_time = time.time()
68
    try:
69
        task = utils.decode_unicode_obj(json.loads(request.form['task']))
70
    except Exception:
71
        result = {
72
            'fetch_result': "",
73
            'logs': u'task json error',
74
            'follows': [],
75
            'messages': [],
76
            'result': None,
77
            'time': time.time() - start_time,
78
        }
79
        return json.dumps(utils.unicode_obj(result)), \
80
            200, {'Content-Type': 'application/json'}
81
82
    project_info = {
83
        'name': project,
84
        'status': 'DEBUG',
85
        'script': request.form['script'],
86
    }
87
88
    if request.form.get('webdav_mode') == 'true':
89
        projectdb = app.config['projectdb']
90
        info = projectdb.get(project, fields=['name', 'script'])
91
        if not info:
92
            result = {
93
                'fetch_result': "",
94
                'logs': u' in wevdav mode, cannot load script',
95
                'follows': [],
96
                'messages': [],
97
                'result': None,
98
                'time': time.time() - start_time,
99
            }
100
            return json.dumps(utils.unicode_obj(result)), \
101
                200, {'Content-Type': 'application/json'}
102
        project_info['script'] = info['script']
103
104
    fetch_result = {}
105
    try:
106
        module = ProjectManager.build_module(project_info, {
107
            'debugger': True,
108
            'process_time_limit': app.config['process_time_limit'],
109
        })
110
111
        # The code below is to mock the behavior that crawl_config been joined when selected by scheduler.
112
        # but to have a better view of joined tasks, it has been done in BaseHandler.crawl when `is_debugger is True`
113
        # crawl_config = module['instance'].crawl_config
114
        # task = module['instance'].task_join_crawl_config(task, crawl_config)
115
116
        fetch_result = app.config['fetch'](task)
117
        response = rebuild_response(fetch_result)
118
119
        ret = module['instance'].run_task(module['module'], task, response)
120
    except Exception:
121
        type, value, tb = sys.exc_info()
122
        tb = utils.hide_me(tb, globals())
123
        logs = ''.join(traceback.format_exception(type, value, tb))
124
        result = {
125
            'fetch_result': fetch_result,
126
            'logs': logs,
127
            'follows': [],
128
            'messages': [],
129
            'result': None,
130
            'time': time.time() - start_time,
131
        }
132
    else:
133
        result = {
134
            'fetch_result': fetch_result,
135
            'logs': ret.logstr(),
136
            'follows': ret.follows,
137
            'messages': ret.messages,
138
            'result': ret.result,
139
            'time': time.time() - start_time,
140
        }
141
        result['fetch_result']['content'] = response.text
142
        if (response.headers.get('content-type', '').startswith('image')):
143
            result['fetch_result']['dataurl'] = dataurl.encode(
144
                response.content, response.headers['content-type'])
145
146
    try:
147
        # binary data can't encode to JSON, encode result as unicode obj
148
        # before send it to frontend
149
        return json.dumps(utils.unicode_obj(result)), 200, {'Content-Type': 'application/json'}
150
    except Exception:
151
        type, value, tb = sys.exc_info()
152
        tb = utils.hide_me(tb, globals())
153
        logs = ''.join(traceback.format_exception(type, value, tb))
154
        result = {
155
            'fetch_result': "",
156
            'logs': logs,
157
            'follows': [],
158
            'messages': [],
159
            'result': None,
160
            'time': time.time() - start_time,
161
        }
162
        return json.dumps(utils.unicode_obj(result)), 200, {'Content-Type': 'application/json'}
163
164
165
@app.route('/debug/<project>/save', methods=['POST', ])
166
def save(project):
167
    projectdb = app.config['projectdb']
168
    if not projectdb.verify_project_name(project):
169
        return 'project name is not allowed!', 400
170
    script = request.form['script']
171
    project_info = projectdb.get(project, fields=['name', 'status', 'group'])
172
    if project_info and 'lock' in projectdb.split_group(project_info.get('group')) \
173
            and not login.current_user.is_active():
174
        return app.login_response
175
176
    if project_info:
177
        info = {
178
            'script': script,
179
        }
180
        if project_info.get('status') in ('DEBUG', 'RUNNING', ):
181
            info['status'] = 'CHECKING'
182
        projectdb.update(project, info)
183
    else:
184
        info = {
185
            'name': project,
186
            'script': script,
187
            'status': 'TODO',
188
            'rate': app.config.get('max_rate', 1),
189
            'burst': app.config.get('max_burst', 3),
190
        }
191
        projectdb.insert(project, info)
192
193
    rpc = app.config['scheduler_rpc']
194
    if rpc is not None:
195
        try:
196
            rpc.update_project()
197
        except socket.error as e:
198
            app.logger.warning('connect to scheduler rpc error: %r', e)
199
            return 'rpc error', 200
200
201
    return 'ok', 200
202
203
204
@app.route('/debug/<project>/get')
205
def get_script(project):
206
    projectdb = app.config['projectdb']
207
    if not projectdb.verify_project_name(project):
208
        return 'project name is not allowed!', 400
209
    info = projectdb.get(project, fields=['name', 'script'])
210
    return json.dumps(utils.unicode_obj(info)), \
211
        200, {'Content-Type': 'application/json'}
212
213
214
@app.route('/blank.html')
215
def blank_html():
216
    return ""
217