TestWebUI.test_80_change_group()   A
last analyzed

Complexity

Conditions 1

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
dl 0
loc 12
rs 9.4285
c 0
b 0
f 0
1
#!/usr/bin/env python
2
# -*- encoding: utf-8 -*-
3
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
4
# Author: Binux<[email protected]>
5
#         http://binux.me
6
# Created on 2014-11-18 21:03:22
7
8
import os
9
import re
10
import time
11
import json
12
import shutil
13
import unittest2 as unittest
14
15
from pyspider import run
16
from pyspider.libs import utils
17
from pyspider.libs.utils import run_in_thread, ObjectDict
18
19
20
class TestWebUI(unittest.TestCase):
21
22
    @classmethod
23
    def setUpClass(self):
24
        shutil.rmtree('./data/tests', ignore_errors=True)
25
        os.makedirs('./data/tests')
26
27
        import tests.data_test_webpage
28
        import httpbin
29
        from pyspider.webui import bench_test  # flake8: noqa
30
        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False)
31
        self.httpbin = 'http://127.0.0.1:14887'
32
33
        ctx = run.cli.make_context('test', [
34
            '--taskdb', 'sqlalchemy+sqlite+taskdb:///data/tests/task.db',
35
            '--projectdb', 'sqlalchemy+sqlite+projectdb:///data/tests/projectdb.db',
36
            '--resultdb', 'sqlalchemy+sqlite+resultdb:///data/tests/resultdb.db',
37
        ], None, obj=ObjectDict(testing_mode=True))
38
        self.ctx = run.cli.invoke(ctx)
39
40
        self.threads = []
41
42
        ctx = run.scheduler.make_context('scheduler', [], self.ctx)
43
        self.scheduler = scheduler = run.scheduler.invoke(ctx)
44
        self.threads.append(run_in_thread(scheduler.xmlrpc_run))
45
        self.threads.append(run_in_thread(scheduler.run))
46
47
        ctx = run.fetcher.make_context('fetcher', [
48
            '--xmlrpc',
49
            '--xmlrpc-port', '24444',
50
        ], self.ctx)
51
        fetcher = run.fetcher.invoke(ctx)
52
        self.threads.append(run_in_thread(fetcher.xmlrpc_run))
53
        self.threads.append(run_in_thread(fetcher.run))
54
55
        ctx = run.processor.make_context('processor', [], self.ctx)
56
        processor = run.processor.invoke(ctx)
57
        self.threads.append(run_in_thread(processor.run))
58
59
        ctx = run.result_worker.make_context('result_worker', [], self.ctx)
60
        result_worker = run.result_worker.invoke(ctx)
61
        self.threads.append(run_in_thread(result_worker.run))
62
63
        ctx = run.webui.make_context('webui', [
64
            '--scheduler-rpc', 'http://localhost:23333/'
65
        ], self.ctx)
66
        app = run.webui.invoke(ctx)
67
        app.debug = True
68
        self.app = app.test_client()
69
        self.rpc = app.config['scheduler_rpc']
70
71
        time.sleep(1)
72
73 View Code Duplication
    @classmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
74
    def tearDownClass(self):
75
        for each in self.ctx.obj.instances:
76
            each.quit()
77
        time.sleep(1)
78
79
        for thread in self.threads:
80
            thread.join()
81
82
        self.httpbin_thread.terminate()
83
        self.httpbin_thread.join()
84
85
        assert not utils.check_port_open(5000)
86
        assert not utils.check_port_open(23333)
87
        assert not utils.check_port_open(24444)
88
        assert not utils.check_port_open(25555)
89
        assert not utils.check_port_open(14887)
90
91
        shutil.rmtree('./data/tests', ignore_errors=True)
92
93
    def test_10_index_page(self):
94
        rv = self.app.get('/')
95
        self.assertEqual(rv.status_code, 200)
96
        self.assertIn(b'dashboard', rv.data)
97
98
    def test_20_debug(self):
99
        rv = self.app.get('/debug/test_project')
100
        self.assertEqual(rv.status_code, 200)
101
        self.assertIn(b'debugger', rv.data)
102
        self.assertIn(b'var task_content = ', rv.data)
103
        self.assertIn(b'var script_content = ', rv.data)
104
105
        m = re.search(r'var task_content = (.*);\n', utils.text(rv.data))
106
        self.assertIsNotNone(m)
107
        self.assertIn('test_project', json.loads(m.group(1)))
108
109
        m = re.search(r'var script_content = (.*);\n', utils.text(rv.data))
110
        self.assertIsNotNone(m)
111
        self.assertIn('__START_URL__', json.loads(m.group(1)))
112
113 View Code Duplication
    def test_25_debug_post(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
114
        rv = self.app.post('/debug/test_project', data={
115
            'project-name': 'other_project',
116
            'start-urls': 'http://127.0.0.1:14887/pyspider/test.html',
117
            'script-mode': 'script',
118
        })
119
        self.assertEqual(rv.status_code, 200)
120
        self.assertIn(b'debugger', rv.data)
121
        self.assertIn(b'var task_content = ', rv.data)
122
        self.assertIn(b'var script_content = ', rv.data)
123
124
        m = re.search(r'var task_content = (.*);\n', utils.text(rv.data))
125
        self.assertIsNotNone(m)
126
        self.assertIn('test_project', m.group(1))
127
        self.__class__.task_content = json.loads(m.group(1))
128
129
        m = re.search(r'var script_content = (.*);\n', utils.text(rv.data))
130
        self.assertIsNotNone(m)
131
        self.assertIn('127.0.0.1:14887', m.group(1))
132
        self.__class__.script_content = json.loads(m.group(1))
133
134 View Code Duplication
    def test_30_run(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
135
        rv = self.app.post('/debug/test_project/run', data={
136
            'script': self.script_content,
137
            'task': self.task_content
138
        })
139
        self.assertEqual(rv.status_code, 200)
140
        data = json.loads(utils.text(rv.data))
141
        self.assertIn(b'follows', rv.data)
142
        self.assertGreater(len(data['follows']), 0)
143
        self.__class__.task_content2 = data['follows'][0]
144
145
    def test_32_run_bad_task(self):
146
        rv = self.app.post('/debug/test_project/run', data={
147
            'script': self.script_content,
148
            'task': self.task_content+'asdfasdf312!@#'
149
        })
150
        self.assertEqual(rv.status_code, 200)
151
        data = json.loads(utils.text(rv.data))
152
        self.assertGreater(len(data['logs']), 0)
153
        self.assertEqual(len(data['follows']), 0)
154
155
    def test_33_run_bad_script(self):
156
        rv = self.app.post('/debug/test_project/run', data={
157
            'script': self.script_content+'adfasfasdf',
158
            'task': self.task_content
159
        })
160
        self.assertEqual(rv.status_code, 200)
161
        data = json.loads(utils.text(rv.data))
162
        self.assertGreater(len(data['logs']), 0)
163
        self.assertEqual(len(data['follows']), 0)
164
165
    def test_35_run_http_task(self):
166
        rv = self.app.post('/debug/test_project/run', data={
167
            'script': self.script_content,
168
            'task': json.dumps(self.task_content2)
169
        })
170
        self.assertEqual(rv.status_code, 200)
171
        data = json.loads(utils.text(rv.data))
172
        self.assertIn('follows', data)
173
174
    def test_40_save(self):
175
        rv = self.app.post('/debug/test_project/save', data={
176
            'script': self.script_content,
177
        })
178
        self.assertEqual(rv.status_code, 200)
179
        self.assertIn(b'ok', rv.data)
180
181
    def test_42_get(self):
182
        rv = self.app.get('/debug/test_project/get')
183
        self.assertEqual(rv.status_code, 200)
184
        data = json.loads(utils.text(rv.data))
185
        self.assertIn('script', data)
186
        self.assertEqual(data['script'], self.script_content)
187
188 View Code Duplication
    def test_45_run_with_saved_script(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
189
        rv = self.app.post('/debug/test_project/run', data={
190
            'webdav_mode': 'true',
191
            'script': '',
192
            'task': self.task_content
193
        })
194
        self.assertEqual(rv.status_code, 200)
195
        data = json.loads(utils.text(rv.data))
196
        self.assertIn(b'follows', rv.data)
197
        self.assertGreater(len(data['follows']), 0)
198
        self.__class__.task_content2 = data['follows'][0]
199
200
    def test_50_index_page_list(self):
201
        rv = self.app.get('/')
202
        self.assertEqual(rv.status_code, 200)
203
        self.assertIn(b'"test_project"', rv.data)
204
205
    def test_52_change_status(self):
206
        rv = self.app.post('/update', data={
207
            'name': 'status',
208
            'value': 'RUNNING',
209
            'pk': 'test_project'
210
        })
211
        self.assertEqual(rv.status_code, 200)
212
        self.assertIn(b'ok', rv.data)
213
214
    def test_55_reopen(self):
215
        rv = self.app.get('/debug/test_project')
216
        self.assertEqual(rv.status_code, 200)
217
        self.assertIn(b'debugger', rv.data)
218
219
    def test_57_resave(self):
220
        rv = self.app.post('/debug/test_project/save', data={
221
            'script': self.script_content,
222
        })
223
        self.assertEqual(rv.status_code, 200)
224
        self.assertIn(b'ok', rv.data)
225
226
    def test_58_index_page_list(self):
227
        rv = self.app.get('/')
228
        self.assertEqual(rv.status_code, 200)
229
        self.assertIn(b'CHECKING', rv.data)
230
231
    def test_60_change_rate(self):
232
        rv = self.app.post('/update', data={
233
            'name': 'rate',
234
            'value': '1/4',
235
            'pk': 'test_project'
236
        })
237
        self.assertEqual(rv.status_code, 200)
238
        self.assertIn(b'ok', rv.data)
239
240
    def test_70_change_status(self):
241
        rv = self.app.post('/update', data={
242
            'name': 'status',
243
            'value': 'RUNNING',
244
            'pk': 'test_project'
245
        })
246
        self.assertEqual(rv.status_code, 200)
247
        self.assertIn(b'ok', rv.data)
248
249
    def test_80_change_group(self):
250
        rv = self.app.post('/update', data={
251
            'name': 'group',
252
            'value': 'test_binux',
253
            'pk': 'test_project'
254
        })
255
        self.assertEqual(rv.status_code, 200)
256
        self.assertIn(b'ok', rv.data)
257
258
        rv = self.app.get('/')
259
        self.assertEqual(rv.status_code, 200)
260
        self.assertIn(b'test_binux', rv.data)
261
262
    def test_90_run(self):
263
        time.sleep(0.5)
264
        rv = self.app.post('/run', data={
265
            'project': 'test_project',
266
        })
267
        self.assertEqual(rv.status_code, 200)
268
        self.assertEqual(json.loads(utils.text(rv.data))['result'], True)
269
270
    def test_a10_counter(self):
271
        for i in range(30):
272
            time.sleep(1)
273
            if self.rpc.counter('5m', 'sum')\
274
                    .get('test_project', {}).get('success', 0) > 5:
275
                break
276
277
        rv = self.app.get('/counter')
278
        self.assertEqual(rv.status_code, 200)
279
        data = json.loads(utils.text(rv.data))
280
        self.assertGreater(len(data), 0)
281
        self.assertGreater(data['test_project']['5m']['success'], 3)
282
        self.assertGreater(data['test_project']['1h']['success'], 3)
283
        self.assertGreater(data['test_project']['1d']['success'], 3)
284
        self.assertGreater(data['test_project']['all']['success'], 3)
285
286
    def test_a15_queues(self):
287
        rv = self.app.get('/queues')
288
        self.assertEqual(rv.status_code, 200)
289
        data = json.loads(utils.text(rv.data))
290
        self.assertGreater(len(data), 0)
291
        self.assertIn('scheduler2fetcher', data)
292
        self.assertIn('fetcher2processor', data)
293
        self.assertIn('processor2result', data)
294
        self.assertIn('newtask_queue', data)
295
        self.assertIn('status_queue', data)
296
297 View Code Duplication
    def test_a20_tasks(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
298
        rv = self.app.get('/tasks')
299
        self.assertEqual(rv.status_code, 200, rv.data)
300
        self.assertIn(b'SUCCESS</span>', rv.data)
301
        self.assertNotIn(b'>ERROR</span>', rv.data)
302
        m = re.search(r'/task/test_project:[^"]+', utils.text(rv.data))
303
        self.assertIsNotNone(m)
304
        self.__class__.task_url = m.group(0)
305
        self.assertIsNotNone(self.task_url)
306
        m = re.search(r'/debug/test_project[^"]+', utils.text(rv.data))
307
        self.assertIsNotNone(m)
308
        self.__class__.debug_task_url = m.group(0)
309
        self.assertIsNotNone(self.debug_task_url)
310
311
        rv = self.app.get('/tasks?project=test_project')
312
        self.assertEqual(rv.status_code, 200)
313
        self.assertIn(b'SUCCESS</span>', rv.data)
314
        self.assertNotIn(b'>ERROR</span>', rv.data)
315
316
    def test_a22_active_tasks(self):
317
        rv = self.app.get('/active_tasks')
318
        data = json.loads(utils.text(rv.data))
319
        track = False
320
        self.assertGreater(len(data), 0)
321
        for task in data:
322
            for k in ('taskid', 'project', 'url', 'updatetime'):
323
                self.assertIn(k, task)
324
            if task.get('track'):
325
                track = True
326
                self.assertIn('fetch', task['track'])
327
                self.assertIn('ok', task['track']['fetch'])
328
                self.assertIn('time', task['track']['fetch'])
329
                self.assertIn('process', task['track'])
330
                self.assertIn('ok', task['track']['process'])
331
                self.assertIn('time', task['track']['process'])
332
        self.assertTrue(track)
333
                    
334
335
    def test_a24_task(self):
336
        rv = self.app.get(self.task_url)
337
        self.assertEqual(rv.status_code, 200)
338
        self.assertIn(b'lastcrawltime', rv.data)
339
340
    def test_a25_task_json(self):
341
        rv = self.app.get(self.task_url + '.json')
342
        self.assertEqual(rv.status_code, 200)
343
        self.assertIn('status_string', json.loads(utils.text(rv.data)))
344
345
    def test_a26_debug_task(self):
346
        rv = self.app.get(self.debug_task_url)
347
        self.assertEqual(rv.status_code, 200)
348
349
    def test_a30_results(self):
350
        rv = self.app.get('/results?project=test_project')
351
        self.assertEqual(rv.status_code, 200)
352
        self.assertIn(b'<th>url</th>', rv.data)
353
        self.assertIn(b'open-url', rv.data)
354
355
    def test_a30_export_json(self):
356
        rv = self.app.get('/results/dump/test_project.json')
357
        self.assertEqual(rv.status_code, 200)
358
        self.assertIn(b'"taskid":', rv.data)
359
360
    def test_a32_export_json_style_full(self):
361
        rv = self.app.get('/results/dump/test_project.json?style=full')
362
        self.assertEqual(rv.status_code, 200)
363
        data = json.loads(rv.data.decode('utf8'))
364
        self.assertGreater(len(data), 1)
365
366
    def test_a34_export_json_style_full_limit_1(self):
367
        rv = self.app.get('/results/dump/test_project.json?style=full&limit=1&offset=1')
368
        self.assertEqual(rv.status_code, 200)
369
        data = json.loads(rv.data.decode('utf8'))
370
        self.assertEqual(len(data), 1)
371
372
    def test_a40_export_url_json(self):
373
        rv = self.app.get('/results/dump/test_project.txt')
374
        self.assertEqual(rv.status_code, 200)
375
        self.assertIn(b'"url":', rv.data)
376
377
    def test_a50_export_csv(self):
378
        rv = self.app.get('/results/dump/test_project.csv')
379
        self.assertEqual(rv.status_code, 200)
380
        self.assertIn(b'url,title,url', rv.data)
381
382 View Code Duplication
    def test_a60_fetch_via_cannot_connect_fetcher(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
383
        ctx = run.webui.make_context('webui', [
384
            '--fetcher-rpc', 'http://localhost:20000/',
385
        ], self.ctx)
386
        app = run.webui.invoke(ctx)
387
        app = app.test_client()
388
        rv = app.post('/debug/test_project/run', data={
389
            'script': self.script_content,
390
            'task': self.task_content
391
        })
392
        self.assertEqual(rv.status_code, 200)
393
        data = json.loads(utils.text(rv.data))
394
        self.assertGreater(len(data['logs']), 0)
395
        self.assertEqual(len(data['follows']), 0)
396
397 View Code Duplication
    def test_a70_fetch_via_fetcher(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
398
        ctx = run.webui.make_context('webui', [
399
            '--fetcher-rpc', 'http://localhost:24444/',
400
        ], self.ctx)
401
        app = run.webui.invoke(ctx)
402
        app = app.test_client()
403
        rv = app.post('/debug/test_project/run', data={
404
            'script': self.script_content,
405
            'task': self.task_content
406
        })
407
        self.assertEqual(rv.status_code, 200)
408
        data = json.loads(utils.text(rv.data))
409
        self.assertEqual(len(data['logs']), 0, data['logs'])
410
        self.assertIn(b'follows', rv.data)
411
        self.assertGreater(len(data['follows']), 0)
412
413
    def test_h000_auth(self):
414
        ctx = run.webui.make_context('webui', [
415
            '--scheduler-rpc', 'http://localhost:23333/',
416
            '--username', 'binux',
417
            '--password', '4321',
418
        ], self.ctx)
419
        app = run.webui.invoke(ctx)
420
        self.__class__.app = app.test_client()
421
        self.__class__.rpc = app.config['scheduler_rpc']
422
423
    def test_h005_no_such_project(self):
424
        rv = self.app.post('/update', data={
425
            'name': 'group',
426
            'value': 'lock',
427
            'pk': 'not_exist_project'
428
        })
429
        self.assertEqual(rv.status_code, 404)
430
431
    def test_h005_unknown_field(self):
432
        rv = self.app.post('/update', data={
433
            'name': 'unknown_field',
434
            'value': 'lock',
435
            'pk': 'test_project'
436
        })
437
        self.assertEqual(rv.status_code, 400)
438
439
    def test_h005_rate_wrong_format(self):
440
        rv = self.app.post('/update', data={
441
            'name': 'rate',
442
            'value': 'xxx',
443
            'pk': 'test_project'
444
        })
445
        self.assertEqual(rv.status_code, 400)
446
447
    def test_h010_change_group(self):
448
        rv = self.app.post('/update', data={
449
            'name': 'group',
450
            'value': 'lock',
451
            'pk': 'test_project'
452
        })
453
        self.assertEqual(rv.status_code, 200)
454
        self.assertIn(b'ok', rv.data)
455
456
        rv = self.app.get('/')
457
        self.assertEqual(rv.status_code, 200)
458
        self.assertIn(b'lock', rv.data)
459
460
    def test_h020_change_group_lock_failed(self):
461
        rv = self.app.post('/update', data={
462
            'name': 'group',
463
            'value': '',
464
            'pk': 'test_project'
465
        })
466
        self.assertEqual(rv.status_code, 401)
467
468
    def test_h020_change_group_lock_ok(self):
469
        rv = self.app.post('/update', data={
470
            'name': 'group',
471
            'value': 'test_binux',
472
            'pk': 'test_project'
473
        }, headers={
474
            'Authorization': 'Basic YmludXg6NDMyMQ=='
475
        })
476
        self.assertEqual(rv.status_code, 200)
477
478
    def test_h030_need_auth(self):
479
        ctx = run.webui.make_context('webui', [
480
            '--scheduler-rpc', 'http://localhost:23333/',
481
            '--username', 'binux',
482
            '--password', '4321',
483
            '--need-auth',
484
        ], self.ctx)
485
        app = run.webui.invoke(ctx)
486
        self.__class__.app = app.test_client()
487
        self.__class__.rpc = app.config['scheduler_rpc']
488
489
    def test_h040_auth_fail(self):
490
        rv = self.app.get('/')
491
        self.assertEqual(rv.status_code, 401)
492
493
    def test_h050_auth_fail2(self):
494
        rv = self.app.get('/', headers={
495
            'Authorization': 'Basic Ymlasdfsd'
496
        })
497
        self.assertEqual(rv.status_code, 401)
498
499
    def test_h060_auth_fail3(self):
500
        rv = self.app.get('/', headers={
501
            'Authorization': 'Basic YmludXg6MQ=='
502
        })
503
        self.assertEqual(rv.status_code, 401)
504
505
    def test_h070_auth_ok(self):
506
        rv = self.app.get('/', headers={
507
            'Authorization': 'Basic YmludXg6NDMyMQ=='
508
        })
509
        self.assertEqual(rv.status_code, 200)
510
511
    def test_x0_disconnected_scheduler(self):
512
        ctx = run.webui.make_context('webui', [
513
            '--scheduler-rpc', 'http://localhost:23458/'
514
        ], self.ctx)
515
        app = run.webui.invoke(ctx)
516
        self.__class__.app = app.test_client()
517
        self.__class__.rpc = app.config['scheduler_rpc']
518
519
    def test_x10_project_update(self):
520
        rv = self.app.post('/update', data={
521
            'name': 'status',
522
            'value': 'RUNNING',
523
            'pk': 'test_project'
524
        })
525
        self.assertEqual(rv.status_code, 200)
526
        self.assertNotIn(b'ok', rv.data)
527
528
    def test_x20_counter(self):
529
        rv = self.app.get('/counter?time=5m&type=sum')
530
        self.assertEqual(rv.status_code, 200)
531
        self.assertEqual(json.loads(utils.text(rv.data)), {})
532
533
    def test_x30_run_not_exists_project(self):
534
        rv = self.app.post('/run', data={
535
            'project': 'not_exist_project',
536
        })
537
        self.assertEqual(rv.status_code, 404)
538
539
    def test_x30_run(self):
540
        rv = self.app.post('/run', data={
541
            'project': 'test_project',
542
        })
543
        self.assertEqual(rv.status_code, 200)
544
        self.assertEqual(json.loads(utils.text(rv.data))['result'], False)
545
546
    def test_x40_debug_save(self):
547
        rv = self.app.post('/debug/test_project/save', data={
548
            'script': self.script_content,
549
        })
550
        self.assertEqual(rv.status_code, 200)
551
        self.assertNotIn(b'ok', rv.data)
552
553
    def test_x50_tasks(self):
554
        rv = self.app.get('/tasks')
555
        self.assertEqual(rv.status_code, 502)
556
557
    def test_x60_robots(self):
558
        rv = self.app.get('/robots.txt')
559
        self.assertEqual(rv.status_code, 200)
560
        self.assertIn(b'ser-agent', rv.data)
561
562
    def test_x70_bench(self):
563
        rv = self.app.get('/bench?total=10&show=5')
564
        self.assertEqual(rv.status_code, 200)
565