Completed
Push — sqlalchemy_UnicodeEncodeError ( 1be63e )
by Roy
01:10
created

TestWebUI.test_h050_auth_fail2()   A

Complexity

Conditions 1

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
dl 0
loc 5
rs 9.4285
c 0
b 0
f 0
1
#!/usr/bin/env python
2
# -*- encoding: utf-8 -*-
3
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
4
# Author: Binux<[email protected]>
5
#         http://binux.me
6
# Created on 2014-11-18 21:03:22
7
8
import os
9
import re
10
import time
11
import json
12
import shutil
13
import unittest2 as unittest
14
15
from pyspider import run
16
from pyspider.libs import utils
17
from pyspider.libs.utils import run_in_thread, ObjectDict
18
19
20
class TestWebUI(unittest.TestCase):
21
22
    @classmethod
23
    def setUpClass(self):
24
        shutil.rmtree('./data/tests', ignore_errors=True)
25
        os.makedirs('./data/tests')
26
27
        import tests.data_test_webpage
28
        import httpbin
29
        from pyspider.webui import bench_test  # flake8: noqa
30
        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False)
31
        self.httpbin = 'http://127.0.0.1:14887'
32
33
        ctx = run.cli.make_context('test', [
34
            '--taskdb', 'sqlalchemy+sqlite+taskdb:///data/tests/task.db',
35
            '--projectdb', 'sqlalchemy+sqlite+projectdb:///data/tests/projectdb.db',
36
            '--resultdb', 'sqlalchemy+sqlite+resultdb:///data/tests/resultdb.db',
37
        ], None, obj=ObjectDict(testing_mode=True))
38
        self.ctx = run.cli.invoke(ctx)
39
40
        self.threads = []
41
42
        ctx = run.scheduler.make_context('scheduler', [], self.ctx)
43
        self.scheduler = scheduler = run.scheduler.invoke(ctx)
44
        self.threads.append(run_in_thread(scheduler.xmlrpc_run))
45
        self.threads.append(run_in_thread(scheduler.run))
46
47
        ctx = run.fetcher.make_context('fetcher', [
48
            '--xmlrpc',
49
            '--xmlrpc-port', '24444',
50
        ], self.ctx)
51
        fetcher = run.fetcher.invoke(ctx)
52
        self.threads.append(run_in_thread(fetcher.xmlrpc_run))
53
        self.threads.append(run_in_thread(fetcher.run))
54
55
        ctx = run.processor.make_context('processor', [], self.ctx)
56
        processor = run.processor.invoke(ctx)
57
        self.threads.append(run_in_thread(processor.run))
58
59
        ctx = run.result_worker.make_context('result_worker', [], self.ctx)
60
        result_worker = run.result_worker.invoke(ctx)
61
        self.threads.append(run_in_thread(result_worker.run))
62
63
        ctx = run.webui.make_context('webui', [
64
            '--scheduler-rpc', 'http://localhost:23333/'
65
        ], self.ctx)
66
        app = run.webui.invoke(ctx)
67
        app.debug = True
68
        self.app = app.test_client()
69
        self.rpc = app.config['scheduler_rpc']
70
71
        time.sleep(1)
72 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
73
    @classmethod
74
    def tearDownClass(self):
75
        for each in self.ctx.obj.instances:
76
            each.quit()
77
        time.sleep(1)
78
79
        for thread in self.threads:
80
            thread.join()
81
82
        self.httpbin_thread.terminate()
83
        self.httpbin_thread.join()
84
85
        assert not utils.check_port_open(5000)
86
        assert not utils.check_port_open(23333)
87
        assert not utils.check_port_open(24444)
88
        assert not utils.check_port_open(25555)
89
        assert not utils.check_port_open(14887)
90
91
        shutil.rmtree('./data/tests', ignore_errors=True)
92
93
    def test_10_index_page(self):
94
        rv = self.app.get('/')
95
        self.assertEqual(rv.status_code, 200)
96
        self.assertIn(b'dashboard', rv.data)
97
98
    def test_20_debug(self):
99
        rv = self.app.get('/debug/test_project')
100
        self.assertEqual(rv.status_code, 200)
101
        self.assertIn(b'debugger', rv.data)
102
        self.assertIn(b'var task_content = ', rv.data)
103
        self.assertIn(b'var script_content = ', rv.data)
104
105
        m = re.search(r'var task_content = (.*);\n', utils.text(rv.data))
106
        self.assertIsNotNone(m)
107
        self.assertIn('test_project', json.loads(m.group(1)))
108
109
        m = re.search(r'var script_content = (.*);\n', utils.text(rv.data))
110
        self.assertIsNotNone(m)
111
        self.assertIn('__START_URL__', json.loads(m.group(1)))
112 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
113
    def test_25_debug_post(self):
114
        rv = self.app.post('/debug/test_project', data={
115
            'project-name': 'other_project',
116
            'start-urls': 'http://127.0.0.1:14887/pyspider/test.html',
117
            'script-mode': 'script',
118
        })
119
        self.assertEqual(rv.status_code, 200)
120
        self.assertIn(b'debugger', rv.data)
121
        self.assertIn(b'var task_content = ', rv.data)
122
        self.assertIn(b'var script_content = ', rv.data)
123
124
        m = re.search(r'var task_content = (.*);\n', utils.text(rv.data))
125
        self.assertIsNotNone(m)
126
        self.assertIn('test_project', m.group(1))
127
        self.__class__.task_content = json.loads(m.group(1))
128
129
        m = re.search(r'var script_content = (.*);\n', utils.text(rv.data))
130
        self.assertIsNotNone(m)
131
        self.assertIn('127.0.0.1:14887', m.group(1))
132
        self.__class__.script_content = json.loads(m.group(1))
133 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
134
    def test_30_run(self):
135
        rv = self.app.post('/debug/test_project/run', data={
136
            'script': self.script_content,
137
            'task': self.task_content
138
        })
139
        self.assertEqual(rv.status_code, 200)
140
        data = json.loads(utils.text(rv.data))
141
        self.assertIn(b'follows', rv.data)
142
        self.assertGreater(len(data['follows']), 0)
143
        self.__class__.task_content2 = data['follows'][0]
144
145
    def test_32_run_bad_task(self):
146
        rv = self.app.post('/debug/test_project/run', data={
147
            'script': self.script_content,
148
            'task': self.task_content+'asdfasdf312!@#'
149
        })
150
        self.assertEqual(rv.status_code, 200)
151
        data = json.loads(utils.text(rv.data))
152
        self.assertGreater(len(data['logs']), 0)
153
        self.assertEqual(len(data['follows']), 0)
154
155
    def test_33_run_bad_script(self):
156
        rv = self.app.post('/debug/test_project/run', data={
157
            'script': self.script_content+'adfasfasdf',
158
            'task': self.task_content
159
        })
160
        self.assertEqual(rv.status_code, 200)
161
        data = json.loads(utils.text(rv.data))
162
        self.assertGreater(len(data['logs']), 0)
163
        self.assertEqual(len(data['follows']), 0)
164
165
    def test_35_run_http_task(self):
166
        rv = self.app.post('/debug/test_project/run', data={
167
            'script': self.script_content,
168
            'task': json.dumps(self.task_content2)
169
        })
170
        self.assertEqual(rv.status_code, 200)
171
        data = json.loads(utils.text(rv.data))
172
        self.assertIn('follows', data)
173
174
    def test_39_save_with_chinese_characters(self):
175
        rv = self.app.post('/debug/test_project/save', data={
176
            'script': self.script_content + '\n#中文',
177
        })
178
        self.assertEqual(rv.status_code, 200)
179
        self.assertIn(b'ok', rv.data)
180
181
    def test_40_save(self):
182
        rv = self.app.post('/debug/test_project/save', data={
183
            'script': self.script_content,
184
        })
185
        self.assertEqual(rv.status_code, 200)
186
        self.assertIn(b'ok', rv.data)
187 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
188
    def test_42_get(self):
189
        rv = self.app.get('/debug/test_project/get')
190
        self.assertEqual(rv.status_code, 200)
191
        data = json.loads(utils.text(rv.data))
192
        self.assertIn('script', data)
193
        self.assertEqual(data['script'], self.script_content)
194
195
    def test_45_run_with_saved_script(self):
196
        rv = self.app.post('/debug/test_project/run', data={
197
            'webdav_mode': 'true',
198
            'script': '',
199
            'task': self.task_content
200
        })
201
        self.assertEqual(rv.status_code, 200)
202
        data = json.loads(utils.text(rv.data))
203
        self.assertIn(b'follows', rv.data)
204
        self.assertGreater(len(data['follows']), 0)
205
        self.__class__.task_content2 = data['follows'][0]
206
207
    def test_50_index_page_list(self):
208
        rv = self.app.get('/')
209
        self.assertEqual(rv.status_code, 200)
210
        self.assertIn(b'"test_project"', rv.data)
211
212
    def test_52_change_status(self):
213
        rv = self.app.post('/update', data={
214
            'name': 'status',
215
            'value': 'RUNNING',
216
            'pk': 'test_project'
217
        })
218
        self.assertEqual(rv.status_code, 200)
219
        self.assertIn(b'ok', rv.data)
220
221
    def test_55_reopen(self):
222
        rv = self.app.get('/debug/test_project')
223
        self.assertEqual(rv.status_code, 200)
224
        self.assertIn(b'debugger', rv.data)
225
226
    def test_57_resave(self):
227
        rv = self.app.post('/debug/test_project/save', data={
228
            'script': self.script_content,
229
        })
230
        self.assertEqual(rv.status_code, 200)
231
        self.assertIn(b'ok', rv.data)
232
233
    def test_58_index_page_list(self):
234
        rv = self.app.get('/')
235
        self.assertEqual(rv.status_code, 200)
236
        self.assertIn(b'CHECKING', rv.data)
237
238
    def test_60_change_rate(self):
239
        rv = self.app.post('/update', data={
240
            'name': 'rate',
241
            'value': '1/4',
242
            'pk': 'test_project'
243
        })
244
        self.assertEqual(rv.status_code, 200)
245
        self.assertIn(b'ok', rv.data)
246
247
    def test_70_change_status(self):
248
        rv = self.app.post('/update', data={
249
            'name': 'status',
250
            'value': 'RUNNING',
251
            'pk': 'test_project'
252
        })
253
        self.assertEqual(rv.status_code, 200)
254
        self.assertIn(b'ok', rv.data)
255
256
    def test_80_change_group(self):
257
        rv = self.app.post('/update', data={
258
            'name': 'group',
259
            'value': 'test_binux',
260
            'pk': 'test_project'
261
        })
262
        self.assertEqual(rv.status_code, 200)
263
        self.assertIn(b'ok', rv.data)
264
265
        rv = self.app.get('/')
266
        self.assertEqual(rv.status_code, 200)
267
        self.assertIn(b'test_binux', rv.data)
268
269
    def test_90_run(self):
270
        time.sleep(0.5)
271
        rv = self.app.post('/run', data={
272
            'project': 'test_project',
273
        })
274
        self.assertEqual(rv.status_code, 200)
275
        self.assertEqual(json.loads(utils.text(rv.data))['result'], True)
276
277
    def test_a10_counter(self):
278
        for i in range(30):
279
            time.sleep(1)
280
            if self.rpc.counter('5m', 'sum')\
281
                    .get('test_project', {}).get('success', 0) > 5:
282
                break
283
284
        rv = self.app.get('/counter')
285 View Code Duplication
        self.assertEqual(rv.status_code, 200)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
286
        data = json.loads(utils.text(rv.data))
287
        self.assertGreater(len(data), 0)
288
        self.assertGreater(data['test_project']['5m']['success'], 3)
289
        self.assertGreater(data['test_project']['1h']['success'], 3)
290
        self.assertGreater(data['test_project']['1d']['success'], 3)
291
        self.assertGreater(data['test_project']['all']['success'], 3)
292
293
    def test_a15_queues(self):
294
        rv = self.app.get('/queues')
295
        self.assertEqual(rv.status_code, 200)
296
        data = json.loads(utils.text(rv.data))
297
        self.assertGreater(len(data), 0)
298
        self.assertIn('scheduler2fetcher', data)
299
        self.assertIn('fetcher2processor', data)
300
        self.assertIn('processor2result', data)
301
        self.assertIn('newtask_queue', data)
302
        self.assertIn('status_queue', data)
303
304
    def test_a20_tasks(self):
305
        rv = self.app.get('/tasks')
306
        self.assertEqual(rv.status_code, 200, rv.data)
307
        self.assertIn(b'SUCCESS</span>', rv.data)
308
        self.assertNotIn(b'>ERROR</span>', rv.data)
309
        m = re.search(r'/task/test_project:[^"]+', utils.text(rv.data))
310
        self.assertIsNotNone(m)
311
        self.__class__.task_url = m.group(0)
312
        self.assertIsNotNone(self.task_url)
313
        m = re.search(r'/debug/test_project[^"]+', utils.text(rv.data))
314
        self.assertIsNotNone(m)
315
        self.__class__.debug_task_url = m.group(0)
316
        self.assertIsNotNone(self.debug_task_url)
317
318
        rv = self.app.get('/tasks?project=test_project')
319
        self.assertEqual(rv.status_code, 200)
320
        self.assertIn(b'SUCCESS</span>', rv.data)
321
        self.assertNotIn(b'>ERROR</span>', rv.data)
322
323
    def test_a22_active_tasks(self):
324
        rv = self.app.get('/active_tasks')
325
        data = json.loads(utils.text(rv.data))
326
        track = False
327
        self.assertGreater(len(data), 0)
328
        for task in data:
329
            for k in ('taskid', 'project', 'url', 'updatetime'):
330
                self.assertIn(k, task)
331
            if task.get('track'):
332
                track = True
333
                self.assertIn('fetch', task['track'])
334
                self.assertIn('ok', task['track']['fetch'])
335
                self.assertIn('time', task['track']['fetch'])
336
                self.assertIn('process', task['track'])
337
                self.assertIn('ok', task['track']['process'])
338
                self.assertIn('time', task['track']['process'])
339
        self.assertTrue(track)
340
                    
341
342
    def test_a24_task(self):
343
        rv = self.app.get(self.task_url)
344
        self.assertEqual(rv.status_code, 200)
345
        self.assertIn(b'lastcrawltime', rv.data)
346
347
    def test_a25_task_json(self):
348
        rv = self.app.get(self.task_url + '.json')
349
        self.assertEqual(rv.status_code, 200)
350
        self.assertIn('status_string', json.loads(utils.text(rv.data)))
351
352
    def test_a26_debug_task(self):
353
        rv = self.app.get(self.debug_task_url)
354
        self.assertEqual(rv.status_code, 200)
355
356
    def test_a30_results(self):
357
        rv = self.app.get('/results?project=test_project')
358
        self.assertEqual(rv.status_code, 200)
359
        self.assertIn(b'<th>url</th>', rv.data)
360
        self.assertIn(b'open-url', rv.data)
361
362
    def test_a30_export_json(self):
363
        rv = self.app.get('/results/dump/test_project.json')
364
        self.assertEqual(rv.status_code, 200)
365 View Code Duplication
        self.assertIn(b'"taskid":', rv.data)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
366
367
    def test_a32_export_json_style_full(self):
368
        rv = self.app.get('/results/dump/test_project.json?style=full')
369
        self.assertEqual(rv.status_code, 200)
370
        data = json.loads(rv.data.decode('utf8'))
371
        self.assertGreater(len(data), 1)
372
373
    def test_a34_export_json_style_full_limit_1(self):
374
        rv = self.app.get('/results/dump/test_project.json?style=full&limit=1&offset=1')
375
        self.assertEqual(rv.status_code, 200)
376
        data = json.loads(rv.data.decode('utf8'))
377
        self.assertEqual(len(data), 1)
378
379
    def test_a40_export_url_json(self):
380 View Code Duplication
        rv = self.app.get('/results/dump/test_project.txt')
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
381
        self.assertEqual(rv.status_code, 200)
382
        self.assertIn(b'"url":', rv.data)
383
384
    def test_a50_export_csv(self):
385
        rv = self.app.get('/results/dump/test_project.csv')
386
        self.assertEqual(rv.status_code, 200)
387
        self.assertIn(b'url,title,url', rv.data)
388
389
    def test_a60_fetch_via_cannot_connect_fetcher(self):
390
        ctx = run.webui.make_context('webui', [
391
            '--fetcher-rpc', 'http://localhost:20000/',
392
        ], self.ctx)
393
        app = run.webui.invoke(ctx)
394
        app = app.test_client()
395
        rv = app.post('/debug/test_project/run', data={
396
            'script': self.script_content,
397
            'task': self.task_content
398
        })
399
        self.assertEqual(rv.status_code, 200)
400
        data = json.loads(utils.text(rv.data))
401
        self.assertGreater(len(data['logs']), 0)
402
        self.assertEqual(len(data['follows']), 0)
403
404
    def test_a70_fetch_via_fetcher(self):
405
        ctx = run.webui.make_context('webui', [
406
            '--fetcher-rpc', 'http://localhost:24444/',
407
        ], self.ctx)
408
        app = run.webui.invoke(ctx)
409
        app = app.test_client()
410
        rv = app.post('/debug/test_project/run', data={
411
            'script': self.script_content,
412
            'task': self.task_content
413
        })
414
        self.assertEqual(rv.status_code, 200)
415
        data = json.loads(utils.text(rv.data))
416
        self.assertEqual(len(data['logs']), 0, data['logs'])
417
        self.assertIn(b'follows', rv.data)
418
        self.assertGreater(len(data['follows']), 0)
419
420
    def test_h000_auth(self):
421
        ctx = run.webui.make_context('webui', [
422
            '--scheduler-rpc', 'http://localhost:23333/',
423
            '--username', 'binux',
424
            '--password', '4321',
425
        ], self.ctx)
426
        app = run.webui.invoke(ctx)
427
        self.__class__.app = app.test_client()
428
        self.__class__.rpc = app.config['scheduler_rpc']
429
430
    def test_h005_no_such_project(self):
431
        rv = self.app.post('/update', data={
432
            'name': 'group',
433
            'value': 'lock',
434
            'pk': 'not_exist_project'
435
        })
436
        self.assertEqual(rv.status_code, 404)
437
438
    def test_h005_unknown_field(self):
439
        rv = self.app.post('/update', data={
440
            'name': 'unknown_field',
441
            'value': 'lock',
442
            'pk': 'test_project'
443
        })
444
        self.assertEqual(rv.status_code, 400)
445
446
    def test_h005_rate_wrong_format(self):
447
        rv = self.app.post('/update', data={
448
            'name': 'rate',
449
            'value': 'xxx',
450
            'pk': 'test_project'
451
        })
452
        self.assertEqual(rv.status_code, 400)
453
454
    def test_h010_change_group(self):
455
        rv = self.app.post('/update', data={
456
            'name': 'group',
457
            'value': 'lock',
458
            'pk': 'test_project'
459
        })
460
        self.assertEqual(rv.status_code, 200)
461
        self.assertIn(b'ok', rv.data)
462
463
        rv = self.app.get('/')
464
        self.assertEqual(rv.status_code, 200)
465
        self.assertIn(b'lock', rv.data)
466
467
    def test_h020_change_group_lock_failed(self):
468
        rv = self.app.post('/update', data={
469
            'name': 'group',
470
            'value': '',
471
            'pk': 'test_project'
472
        })
473
        self.assertEqual(rv.status_code, 401)
474
475
    def test_h020_change_group_lock_ok(self):
476
        rv = self.app.post('/update', data={
477
            'name': 'group',
478
            'value': 'test_binux',
479
            'pk': 'test_project'
480
        }, headers={
481
            'Authorization': 'Basic YmludXg6NDMyMQ=='
482
        })
483
        self.assertEqual(rv.status_code, 200)
484
485
    def test_h030_need_auth(self):
486
        ctx = run.webui.make_context('webui', [
487
            '--scheduler-rpc', 'http://localhost:23333/',
488
            '--username', 'binux',
489
            '--password', '4321',
490
            '--need-auth',
491
        ], self.ctx)
492
        app = run.webui.invoke(ctx)
493
        self.__class__.app = app.test_client()
494
        self.__class__.rpc = app.config['scheduler_rpc']
495
496
    def test_h040_auth_fail(self):
497
        rv = self.app.get('/')
498
        self.assertEqual(rv.status_code, 401)
499
500
    def test_h050_auth_fail2(self):
501
        rv = self.app.get('/', headers={
502
            'Authorization': 'Basic Ymlasdfsd'
503
        })
504
        self.assertEqual(rv.status_code, 401)
505
506
    def test_h060_auth_fail3(self):
507
        rv = self.app.get('/', headers={
508
            'Authorization': 'Basic YmludXg6MQ=='
509
        })
510
        self.assertEqual(rv.status_code, 401)
511
512
    def test_h070_auth_ok(self):
513
        rv = self.app.get('/', headers={
514
            'Authorization': 'Basic YmludXg6NDMyMQ=='
515
        })
516
        self.assertEqual(rv.status_code, 200)
517
518
    def test_x0_disconnected_scheduler(self):
519
        ctx = run.webui.make_context('webui', [
520
            '--scheduler-rpc', 'http://localhost:23458/'
521
        ], self.ctx)
522
        app = run.webui.invoke(ctx)
523
        self.__class__.app = app.test_client()
524
        self.__class__.rpc = app.config['scheduler_rpc']
525
526
    def test_x10_project_update(self):
527
        rv = self.app.post('/update', data={
528
            'name': 'status',
529
            'value': 'RUNNING',
530
            'pk': 'test_project'
531
        })
532
        self.assertEqual(rv.status_code, 200)
533
        self.assertNotIn(b'ok', rv.data)
534
535
    def test_x20_counter(self):
536
        rv = self.app.get('/counter?time=5m&type=sum')
537
        self.assertEqual(rv.status_code, 200)
538
        self.assertEqual(json.loads(utils.text(rv.data)), {})
539
540
    def test_x30_run_not_exists_project(self):
541
        rv = self.app.post('/run', data={
542
            'project': 'not_exist_project',
543
        })
544
        self.assertEqual(rv.status_code, 404)
545
546
    def test_x30_run(self):
547
        rv = self.app.post('/run', data={
548
            'project': 'test_project',
549
        })
550
        self.assertEqual(rv.status_code, 200)
551
        self.assertEqual(json.loads(utils.text(rv.data))['result'], False)
552
553
    def test_x40_debug_save(self):
554
        rv = self.app.post('/debug/test_project/save', data={
555
            'script': self.script_content,
556
        })
557
        self.assertEqual(rv.status_code, 200)
558
        self.assertNotIn(b'ok', rv.data)
559
560
    def test_x50_tasks(self):
561
        rv = self.app.get('/tasks')
562
        self.assertEqual(rv.status_code, 502)
563
564
    def test_x60_robots(self):
565
        rv = self.app.get('/robots.txt')
566
        self.assertEqual(rv.status_code, 200)
567
        self.assertIn(b'ser-agent', rv.data)
568
569
    def test_x70_bench(self):
570
        rv = self.app.get('/bench?total=10&show=5')
571
        self.assertEqual(rv.status_code, 200)
572