Completed
Push — master ( 39eece...c8d455 )
by Roy
01:11
created

TestWebUI.test_25_debug_post()   A

Complexity

Conditions 1

Size

Total Lines 20

Duplication

Lines 20
Ratio 100 %
Metric Value
cc 1
dl 20
loc 20
rs 9.4285
1
#!/usr/bin/env python
2
# -*- encoding: utf-8 -*-
3
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
4
# Author: Binux<[email protected]>
5
#         http://binux.me
6
# Created on 2014-11-18 21:03:22
7
8
import os
9
import re
10
import time
11
import json
12
import shutil
13
import unittest2 as unittest
14
15
from pyspider import run
16
from pyspider.libs import utils
17
from pyspider.libs.utils import run_in_thread, ObjectDict
18
19
20
class TestWebUI(unittest.TestCase):
21
22
    @classmethod
23
    def setUpClass(self):
24
        shutil.rmtree('./data/tests', ignore_errors=True)
25
        os.makedirs('./data/tests')
26
27
        import tests.data_test_webpage
28
        import httpbin
29
        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887)
30
        self.httpbin = 'http://127.0.0.1:14887'
31
32
        ctx = run.cli.make_context('test', [
33
            '--taskdb', 'sqlalchemy+sqlite+taskdb:///data/tests/task.db',
34
            '--projectdb', 'sqlalchemy+sqlite+projectdb:///data/tests/projectdb.db',
35
            '--resultdb', 'sqlalchemy+sqlite+resultdb:///data/tests/resultdb.db',
36
        ], None, obj=ObjectDict(testing_mode=True))
37
        self.ctx = run.cli.invoke(ctx)
38
39
        self.threads = []
40
41
        ctx = run.scheduler.make_context('scheduler', [], self.ctx)
42
        self.scheduler = scheduler = run.scheduler.invoke(ctx)
43
        self.threads.append(run_in_thread(scheduler.xmlrpc_run))
44
        self.threads.append(run_in_thread(scheduler.run))
45
46
        ctx = run.fetcher.make_context('fetcher', [
47
            '--xmlrpc',
48
            '--xmlrpc-port', '24444',
49
        ], self.ctx)
50
        fetcher = run.fetcher.invoke(ctx)
51
        self.threads.append(run_in_thread(fetcher.xmlrpc_run))
52
        self.threads.append(run_in_thread(fetcher.run))
53
54
        ctx = run.processor.make_context('processor', [], self.ctx)
55
        processor = run.processor.invoke(ctx)
56
        self.threads.append(run_in_thread(processor.run))
57
58
        ctx = run.result_worker.make_context('result_worker', [], self.ctx)
59
        result_worker = run.result_worker.invoke(ctx)
60
        self.threads.append(run_in_thread(result_worker.run))
61
62
        ctx = run.webui.make_context('webui', [
63
            '--scheduler-rpc', 'http://localhost:23333/'
64
        ], self.ctx)
65
        app = run.webui.invoke(ctx)
66
        app.debug = True
67
        self.app = app.test_client()
68
        self.rpc = app.config['scheduler_rpc']
69
70
        time.sleep(1)
71
72 View Code Duplication
    @classmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
73
    def tearDownClass(self):
74
        for each in self.ctx.obj.instances:
75
            each.quit()
76
        time.sleep(1)
77
78
        for thread in self.threads:
79
            thread.join()
80
81
        self.httpbin_thread.terminate()
82
        self.httpbin_thread.join()
83
84
        assert not utils.check_port_open(5000)
85
        assert not utils.check_port_open(23333)
86
        assert not utils.check_port_open(24444)
87
        assert not utils.check_port_open(25555)
88
        assert not utils.check_port_open(14887)
89
90
        shutil.rmtree('./data/tests', ignore_errors=True)
91
92
    def test_10_index_page(self):
93
        rv = self.app.get('/')
94
        self.assertEqual(rv.status_code, 200)
95
        self.assertIn(b'dashboard', rv.data)
96
97
    def test_20_debug(self):
98
        rv = self.app.get('/debug/test_project')
99
        self.assertEqual(rv.status_code, 200)
100
        self.assertIn(b'debugger', rv.data)
101
        self.assertIn(b'var task_content = ', rv.data)
102
        self.assertIn(b'var script_content = ', rv.data)
103
104
        m = re.search(r'var task_content = (.*);\n', utils.text(rv.data))
105
        self.assertIsNotNone(m)
106
        self.assertIn('test_project', json.loads(m.group(1)))
107
108
        m = re.search(r'var script_content = (.*);\n', utils.text(rv.data))
109
        self.assertIsNotNone(m)
110
        self.assertIn('__START_URL__', json.loads(m.group(1)))
111
112 View Code Duplication
    def test_25_debug_post(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
113
        rv = self.app.post('/debug/test_project', data={
114
            'project-name': 'other_project',
115
            'start-urls': 'http://127.0.0.1:14887/pyspider/test.html',
116
            'script-mode': 'script',
117
        })
118
        self.assertEqual(rv.status_code, 200)
119
        self.assertIn(b'debugger', rv.data)
120
        self.assertIn(b'var task_content = ', rv.data)
121
        self.assertIn(b'var script_content = ', rv.data)
122
123
        m = re.search(r'var task_content = (.*);\n', utils.text(rv.data))
124
        self.assertIsNotNone(m)
125
        self.assertIn('test_project', m.group(1))
126
        self.__class__.task_content = json.loads(m.group(1))
127
128
        m = re.search(r'var script_content = (.*);\n', utils.text(rv.data))
129
        self.assertIsNotNone(m)
130
        self.assertIn('127.0.0.1:14887', m.group(1))
131
        self.__class__.script_content = json.loads(m.group(1))
132
133 View Code Duplication
    def test_30_run(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
134
        rv = self.app.post('/debug/test_project/run', data={
135
            'script': self.script_content,
136
            'task': self.task_content
137
        })
138
        self.assertEqual(rv.status_code, 200)
139
        data = json.loads(utils.text(rv.data))
140
        self.assertIn(b'follows', rv.data)
141
        self.assertGreater(len(data['follows']), 0)
142
        self.__class__.task_content2 = data['follows'][0]
143
144
    def test_32_run_bad_task(self):
145
        rv = self.app.post('/debug/test_project/run', data={
146
            'script': self.script_content,
147
            'task': self.task_content+'asdfasdf312!@#'
148
        })
149
        self.assertEqual(rv.status_code, 200)
150
        data = json.loads(utils.text(rv.data))
151
        self.assertGreater(len(data['logs']), 0)
152
        self.assertEqual(len(data['follows']), 0)
153
154
    def test_33_run_bad_script(self):
155
        rv = self.app.post('/debug/test_project/run', data={
156
            'script': self.script_content+'adfasfasdf',
157
            'task': self.task_content
158
        })
159
        self.assertEqual(rv.status_code, 200)
160
        data = json.loads(utils.text(rv.data))
161
        self.assertGreater(len(data['logs']), 0)
162
        self.assertEqual(len(data['follows']), 0)
163
164
    def test_35_run_http_task(self):
165
        rv = self.app.post('/debug/test_project/run', data={
166
            'script': self.script_content,
167
            'task': json.dumps(self.task_content2)
168
        })
169
        self.assertEqual(rv.status_code, 200)
170
        data = json.loads(utils.text(rv.data))
171
        self.assertIn('follows', data)
172
173
    def test_40_save(self):
174
        rv = self.app.post('/debug/test_project/save', data={
175
            'script': self.script_content,
176
        })
177
        self.assertEqual(rv.status_code, 200)
178
        self.assertIn(b'ok', rv.data)
179
180
    def test_42_get(self):
181
        rv = self.app.get('/debug/test_project/get')
182
        self.assertEqual(rv.status_code, 200)
183
        data = json.loads(utils.text(rv.data))
184
        self.assertIn('script', data)
185
        self.assertEqual(data['script'], self.script_content)
186
187 View Code Duplication
    def test_45_run_with_saved_script(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
188
        rv = self.app.post('/debug/test_project/run', data={
189
            'webdav_mode': 'true',
190
            'script': '',
191
            'task': self.task_content
192
        })
193
        self.assertEqual(rv.status_code, 200)
194
        data = json.loads(utils.text(rv.data))
195
        self.assertIn(b'follows', rv.data)
196
        self.assertGreater(len(data['follows']), 0)
197
        self.__class__.task_content2 = data['follows'][0]
198
199
    def test_50_index_page_list(self):
200
        rv = self.app.get('/')
201
        self.assertEqual(rv.status_code, 200)
202
        self.assertIn(b'test_project</a>', rv.data)
203
204
    def test_52_change_status(self):
205
        rv = self.app.post('/update', data={
206
            'name': 'status',
207
            'value': 'RUNNING',
208
            'pk': 'test_project'
209
        })
210
        self.assertEqual(rv.status_code, 200)
211
        self.assertIn(b'ok', rv.data)
212
213
    def test_55_reopen(self):
214
        rv = self.app.get('/debug/test_project')
215
        self.assertEqual(rv.status_code, 200)
216
        self.assertIn(b'debugger', rv.data)
217
218
    def test_57_resave(self):
219
        rv = self.app.post('/debug/test_project/save', data={
220
            'script': self.script_content,
221
        })
222
        self.assertEqual(rv.status_code, 200)
223
        self.assertIn(b'ok', rv.data)
224
225
    def test_58_index_page_list(self):
226
        rv = self.app.get('/')
227
        self.assertEqual(rv.status_code, 200)
228
        self.assertIn(b'CHECKING', rv.data)
229
230
    def test_60_change_rate(self):
231
        rv = self.app.post('/update', data={
232
            'name': 'rate',
233
            'value': '1/4',
234
            'pk': 'test_project'
235
        })
236
        self.assertEqual(rv.status_code, 200)
237
        self.assertIn(b'ok', rv.data)
238
239
    def test_70_change_status(self):
240
        rv = self.app.post('/update', data={
241
            'name': 'status',
242
            'value': 'RUNNING',
243
            'pk': 'test_project'
244
        })
245
        self.assertEqual(rv.status_code, 200)
246
        self.assertIn(b'ok', rv.data)
247
248
    def test_80_change_group(self):
249
        rv = self.app.post('/update', data={
250
            'name': 'group',
251
            'value': 'test_binux',
252
            'pk': 'test_project'
253
        })
254
        self.assertEqual(rv.status_code, 200)
255
        self.assertIn(b'ok', rv.data)
256
257
        rv = self.app.get('/')
258
        self.assertEqual(rv.status_code, 200)
259
        self.assertIn(b'test_binux', rv.data)
260
261
    def test_90_run(self):
262
        time.sleep(0.5)
263
        rv = self.app.post('/run', data={
264
            'project': 'test_project',
265
        })
266
        self.assertEqual(rv.status_code, 200)
267
        self.assertEqual(json.loads(utils.text(rv.data))['result'], True)
268
269
    def test_a10_counter(self):
270
        for i in range(30):
271
            time.sleep(1)
272
            if self.rpc.counter('5m', 'sum')\
273
                    .get('test_project', {}).get('success', 0) > 5:
274
                break
275
276
        rv = self.app.get('/counter')
277
        self.assertEqual(rv.status_code, 200)
278
        data = json.loads(utils.text(rv.data))
279
        self.assertGreater(len(data), 0)
280
        self.assertGreater(data['test_project']['5m']['success'], 3)
281
        self.assertGreater(data['test_project']['1h']['success'], 3)
282
        self.assertGreater(data['test_project']['1d']['success'], 3)
283
        self.assertGreater(data['test_project']['all']['success'], 3)
284
285 View Code Duplication
    def test_a20_tasks(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
286
        rv = self.app.get('/tasks')
287
        self.assertEqual(rv.status_code, 200, rv.data)
288
        self.assertIn(b'SUCCESS</span>', rv.data)
289
        self.assertNotIn(b'>ERROR</span>', rv.data)
290
        m = re.search(r'/task/test_project:[^"]+', utils.text(rv.data))
291
        self.assertIsNotNone(m)
292
        self.__class__.task_url = m.group(0)
293
        self.assertIsNotNone(self.task_url)
294
        m = re.search(r'/debug/test_project[^"]+', utils.text(rv.data))
295
        self.assertIsNotNone(m)
296
        self.__class__.debug_task_url = m.group(0)
297
        self.assertIsNotNone(self.debug_task_url)
298
299
        rv = self.app.get('/tasks?project=test_project')
300
        self.assertEqual(rv.status_code, 200)
301
        self.assertIn(b'SUCCESS</span>', rv.data)
302
        self.assertNotIn(b'>ERROR</span>', rv.data)
303
304
    def test_a22_active_tasks(self):
305
        rv = self.app.get('/active_tasks')
306
        data = json.loads(utils.text(rv.data))
307
        track = False
308
        self.assertGreater(len(data), 0)
309
        for task in data:
310
            for k in ('taskid', 'project', 'url', 'updatetime'):
311
                self.assertIn(k, task)
312
            if task.get('track'):
313
                track = True
314
                self.assertIn('fetch', task['track'])
315
                self.assertIn('ok', task['track']['fetch'])
316
                self.assertIn('time', task['track']['fetch'])
317
                self.assertIn('process', task['track'])
318
                self.assertIn('ok', task['track']['process'])
319
                self.assertIn('time', task['track']['process'])
320
        self.assertTrue(track)
321
                    
322
323
    def test_a24_task(self):
324
        rv = self.app.get(self.task_url)
325
        self.assertEqual(rv.status_code, 200)
326
        self.assertIn(b'lastcrawltime', rv.data)
327
328
    def test_a26_debug_task(self):
329
        rv = self.app.get(self.debug_task_url)
330
        self.assertEqual(rv.status_code, 200)
331
332
    def test_a30_results(self):
333
        rv = self.app.get('/results?project=test_project')
334
        self.assertEqual(rv.status_code, 200)
335
        self.assertIn(b'<th>url</th>', rv.data)
336
        self.assertIn(b'open-url', rv.data)
337
338
    def test_a30_export_json(self):
339
        rv = self.app.get('/results/dump/test_project.json')
340
        self.assertEqual(rv.status_code, 200)
341
        self.assertIn(b'"taskid":', rv.data)
342
343
    def test_a32_export_json_style_full(self):
344
        rv = self.app.get('/results/dump/test_project.json?style=full')
345
        self.assertEqual(rv.status_code, 200)
346
        data = json.loads(rv.data.decode('utf8'))
347
        self.assertGreater(len(data), 1)
348
349
    def test_a34_export_json_style_full_limit_1(self):
350
        rv = self.app.get('/results/dump/test_project.json?style=full&limit=1&offset=1')
351
        self.assertEqual(rv.status_code, 200)
352
        data = json.loads(rv.data.decode('utf8'))
353
        self.assertEqual(len(data), 1)
354
355
    def test_a40_export_url_json(self):
356
        rv = self.app.get('/results/dump/test_project.txt')
357
        self.assertEqual(rv.status_code, 200)
358
        self.assertIn(b'"url":', rv.data)
359
360
    def test_a50_export_csv(self):
361
        rv = self.app.get('/results/dump/test_project.csv')
362
        self.assertEqual(rv.status_code, 200)
363
        self.assertIn(b'url,title,url', rv.data)
364
365 View Code Duplication
    def test_a60_fetch_via_cannot_connect_fetcher(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
366
        ctx = run.webui.make_context('webui', [
367
            '--fetcher-rpc', 'http://localhost:20000/',
368
        ], self.ctx)
369
        app = run.webui.invoke(ctx)
370
        app = app.test_client()
371
        rv = app.post('/debug/test_project/run', data={
372
            'script': self.script_content,
373
            'task': self.task_content
374
        })
375
        self.assertEqual(rv.status_code, 200)
376
        data = json.loads(utils.text(rv.data))
377
        self.assertGreater(len(data['logs']), 0)
378
        self.assertEqual(len(data['follows']), 0)
379
380 View Code Duplication
    def test_a70_fetch_via_fetcher(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
381
        ctx = run.webui.make_context('webui', [
382
            '--fetcher-rpc', 'http://localhost:24444/',
383
        ], self.ctx)
384
        app = run.webui.invoke(ctx)
385
        app = app.test_client()
386
        rv = app.post('/debug/test_project/run', data={
387
            'script': self.script_content,
388
            'task': self.task_content
389
        })
390
        self.assertEqual(rv.status_code, 200)
391
        data = json.loads(utils.text(rv.data))
392
        self.assertEqual(len(data['logs']), 0, data['logs'])
393
        self.assertIn(b'follows', rv.data)
394
        self.assertGreater(len(data['follows']), 0)
395
396
    def test_h000_auth(self):
397
        ctx = run.webui.make_context('webui', [
398
            '--scheduler-rpc', 'http://localhost:23333/',
399
            '--username', 'binux',
400
            '--password', '4321',
401
        ], self.ctx)
402
        app = run.webui.invoke(ctx)
403
        self.__class__.app = app.test_client()
404
        self.__class__.rpc = app.config['scheduler_rpc']
405
406
    def test_h010_change_group(self):
407
        rv = self.app.post('/update', data={
408
            'name': 'group',
409
            'value': 'lock',
410
            'pk': 'test_project'
411
        })
412
        self.assertEqual(rv.status_code, 200)
413
        self.assertIn(b'ok', rv.data)
414
415
        rv = self.app.get('/')
416
        self.assertEqual(rv.status_code, 200)
417
        self.assertIn(b'lock', rv.data)
418
419
    def test_h020_change_group_lock_failed(self):
420
        rv = self.app.post('/update', data={
421
            'name': 'group',
422
            'value': '',
423
            'pk': 'test_project'
424
        })
425
        self.assertEqual(rv.status_code, 401)
426
427
    def test_h020_change_group_lock_ok(self):
428
        rv = self.app.post('/update', data={
429
            'name': 'group',
430
            'value': 'test_binux',
431
            'pk': 'test_project'
432
        }, headers={
433
            'Authorization': 'Basic YmludXg6NDMyMQ=='
434
        })
435
        self.assertEqual(rv.status_code, 200)
436
437
    def test_h030_need_auth(self):
438
        ctx = run.webui.make_context('webui', [
439
            '--scheduler-rpc', 'http://localhost:23333/',
440
            '--username', 'binux',
441
            '--password', '4321',
442
            '--need-auth',
443
        ], self.ctx)
444
        app = run.webui.invoke(ctx)
445
        self.__class__.app = app.test_client()
446
        self.__class__.rpc = app.config['scheduler_rpc']
447
448
    def test_h040_auth_fail(self):
449
        rv = self.app.get('/')
450
        self.assertEqual(rv.status_code, 401)
451
452
    def test_h050_auth_fail2(self):
453
        rv = self.app.get('/', headers={
454
            'Authorization': 'Basic Ymlasdfsd'
455
        })
456
        self.assertEqual(rv.status_code, 401)
457
458
    def test_h060_auth_fail3(self):
459
        rv = self.app.get('/', headers={
460
            'Authorization': 'Basic YmludXg6MQ=='
461
        })
462
        self.assertEqual(rv.status_code, 401)
463
464
    def test_h070_auth_ok(self):
465
        rv = self.app.get('/', headers={
466
            'Authorization': 'Basic YmludXg6NDMyMQ=='
467
        })
468
        self.assertEqual(rv.status_code, 200)
469
470
    def test_x0_disconnected_scheduler(self):
471
        ctx = run.webui.make_context('webui', [
472
            '--scheduler-rpc', 'http://localhost:23458/'
473
        ], self.ctx)
474
        app = run.webui.invoke(ctx)
475
        self.__class__.app = app.test_client()
476
        self.__class__.rpc = app.config['scheduler_rpc']
477
478
    def test_x10_project_update(self):
479
        rv = self.app.post('/update', data={
480
            'name': 'status',
481
            'value': 'RUNNING',
482
            'pk': 'test_project'
483
        })
484
        self.assertEqual(rv.status_code, 200)
485
        self.assertNotIn(b'ok', rv.data)
486
487
    def test_x20_counter(self):
488
        rv = self.app.get('/counter?time=5m&type=sum')
489
        self.assertEqual(rv.status_code, 200)
490
        self.assertEqual(json.loads(utils.text(rv.data)), {})
491
492
    def test_x30_run(self):
493
        rv = self.app.post('/run', data={
494
            'project': 'test_project',
495
        })
496
        self.assertEqual(rv.status_code, 200)
497
        self.assertEqual(json.loads(utils.text(rv.data))['result'], False)
498
499
    def test_x40_debug_save(self):
500
        rv = self.app.post('/debug/test_project/save', data={
501
            'script': self.script_content,
502
        })
503
        self.assertEqual(rv.status_code, 200)
504
        self.assertNotIn(b'ok', rv.data)
505
506
    def test_x50_tasks(self):
507
        rv = self.app.get('/tasks')
508
        self.assertEqual(rv.status_code, 502)
509