|
1
|
|
|
#!/usr/bin/env python |
|
2
|
|
|
# -*- encoding: utf-8 -*- |
|
3
|
|
|
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8: |
|
4
|
|
|
# Author: Binux<[email protected]> |
|
5
|
|
|
# http://binux.me |
|
6
|
|
|
# Created on 2012-11-06 11:50:13 |
|
7
|
|
|
|
|
8
|
|
|
import math |
|
9
|
|
|
import logging |
|
10
|
|
|
import hashlib |
|
11
|
|
|
import datetime |
|
12
|
|
|
import socket |
|
13
|
|
|
import base64 |
|
14
|
|
|
import warnings |
|
15
|
|
|
import threading |
|
16
|
|
|
|
|
17
|
|
|
import six |
|
18
|
|
|
from six import iteritems |
|
19
|
|
|
|
|
20
|
|
|
md5string = lambda x: hashlib.md5(utf8(x)).hexdigest() |
|
21
|
|
|
|
|
22
|
|
|
|
|
23
|
|
|
class ReadOnlyDict(dict): |
|
24
|
|
|
"""A Read Only Dict""" |
|
25
|
|
|
|
|
26
|
|
|
def __setitem__(self, key, value): |
|
27
|
|
|
raise Exception("dict is read-only") |
|
28
|
|
|
|
|
29
|
|
|
|
|
30
|
|
|
def getitem(obj, key=0, default=None): |
|
31
|
|
|
"""Get first element of list or return default""" |
|
32
|
|
|
try: |
|
33
|
|
|
return obj[key] |
|
34
|
|
|
except: |
|
35
|
|
|
return default |
|
36
|
|
|
|
|
37
|
|
|
|
|
38
|
|
|
def hide_me(tb, g=globals()): |
|
39
|
|
|
"""Hide stack traceback of given stack""" |
|
40
|
|
|
base_tb = tb |
|
41
|
|
|
try: |
|
42
|
|
|
while tb and tb.tb_frame.f_globals is not g: |
|
43
|
|
|
tb = tb.tb_next |
|
44
|
|
|
while tb and tb.tb_frame.f_globals is g: |
|
45
|
|
|
tb = tb.tb_next |
|
46
|
|
|
except Exception as e: |
|
47
|
|
|
logging.exception(e) |
|
48
|
|
|
tb = base_tb |
|
49
|
|
|
if not tb: |
|
50
|
|
|
tb = base_tb |
|
51
|
|
|
return tb |
|
52
|
|
|
|
|
53
|
|
|
|
|
54
|
|
|
def run_in_thread(func, *args, **kwargs): |
|
55
|
|
|
"""Run function in thread, return a Thread object""" |
|
56
|
|
|
from threading import Thread |
|
57
|
|
|
thread = Thread(target=func, args=args, kwargs=kwargs) |
|
58
|
|
|
thread.daemon = True |
|
59
|
|
|
thread.start() |
|
60
|
|
|
return thread |
|
61
|
|
|
|
|
62
|
|
|
|
|
63
|
|
|
def run_in_subprocess(func, *args, **kwargs): |
|
64
|
|
|
"""Run function in subprocess, return a Process object""" |
|
65
|
|
|
from multiprocessing import Process |
|
66
|
|
|
thread = Process(target=func, args=args, kwargs=kwargs) |
|
67
|
|
|
thread.daemon = True |
|
68
|
|
|
thread.start() |
|
69
|
|
|
return thread |
|
70
|
|
|
|
|
71
|
|
|
|
|
72
|
|
|
def format_date(date, gmt_offset=0, relative=True, shorter=False, full_format=False): |
|
73
|
|
|
"""Formats the given date (which should be GMT). |
|
74
|
|
|
|
|
75
|
|
|
By default, we return a relative time (e.g., "2 minutes ago"). You |
|
76
|
|
|
can return an absolute date string with ``relative=False``. |
|
77
|
|
|
|
|
78
|
|
|
You can force a full format date ("July 10, 1980") with |
|
79
|
|
|
``full_format=True``. |
|
80
|
|
|
|
|
81
|
|
|
This method is primarily intended for dates in the past. |
|
82
|
|
|
For dates in the future, we fall back to full format. |
|
83
|
|
|
|
|
84
|
|
|
From tornado |
|
85
|
|
|
""" |
|
86
|
|
|
|
|
87
|
|
|
if not date: |
|
88
|
|
|
return '-' |
|
89
|
|
|
if isinstance(date, float) or isinstance(date, int): |
|
90
|
|
|
date = datetime.datetime.utcfromtimestamp(date) |
|
91
|
|
|
now = datetime.datetime.utcnow() |
|
92
|
|
|
if date > now: |
|
93
|
|
|
if relative and (date - now).seconds < 60: |
|
94
|
|
|
# Due to click skew, things are some things slightly |
|
95
|
|
|
# in the future. Round timestamps in the immediate |
|
96
|
|
|
# future down to now in relative mode. |
|
97
|
|
|
date = now |
|
98
|
|
|
else: |
|
99
|
|
|
# Otherwise, future dates always use the full format. |
|
100
|
|
|
full_format = True |
|
101
|
|
|
local_date = date - datetime.timedelta(minutes=gmt_offset) |
|
102
|
|
|
local_now = now - datetime.timedelta(minutes=gmt_offset) |
|
103
|
|
|
local_yesterday = local_now - datetime.timedelta(hours=24) |
|
104
|
|
|
difference = now - date |
|
105
|
|
|
seconds = difference.seconds |
|
106
|
|
|
days = difference.days |
|
107
|
|
|
|
|
108
|
|
|
format = None |
|
109
|
|
|
if not full_format: |
|
110
|
|
|
ret_, fff_format = fix_full_format(days, seconds, relative, shorter, local_date, local_yesterday) |
|
111
|
|
|
format = fff_format |
|
112
|
|
|
if ret_: |
|
113
|
|
|
return format |
|
114
|
|
|
else: |
|
115
|
|
|
format = format |
|
116
|
|
|
|
|
117
|
|
|
if format is None: |
|
118
|
|
|
format = "%(month_name)s %(day)s, %(year)s" if shorter else \ |
|
119
|
|
|
"%(month_name)s %(day)s, %(year)s at %(time)s" |
|
120
|
|
|
|
|
121
|
|
|
str_time = "%d:%02d" % (local_date.hour, local_date.minute) |
|
122
|
|
|
|
|
123
|
|
|
return format % { |
|
124
|
|
|
"month_name": local_date.strftime('%b'), |
|
125
|
|
|
"weekday": local_date.strftime('%A'), |
|
126
|
|
|
"day": str(local_date.day), |
|
127
|
|
|
"year": str(local_date.year), |
|
128
|
|
|
"month": local_date.month, |
|
129
|
|
|
"time": str_time |
|
130
|
|
|
} |
|
131
|
|
|
|
|
132
|
|
|
|
|
133
|
|
|
def fix_full_format(days, seconds, relative, shorter, local_date, local_yesterday): |
|
134
|
|
|
if relative and days == 0: |
|
135
|
|
|
if seconds < 50: |
|
136
|
|
|
return True, (("1 second ago" if seconds <= 1 else |
|
137
|
|
|
"%(seconds)d seconds ago") % {"seconds": seconds}) |
|
138
|
|
|
|
|
139
|
|
|
if seconds < 50 * 60: |
|
140
|
|
|
minutes = round(seconds / 60.0) |
|
141
|
|
|
return True, (("1 minute ago" if minutes <= 1 else |
|
142
|
|
|
"%(minutes)d minutes ago") % {"minutes": minutes}) |
|
143
|
|
|
|
|
144
|
|
|
hours = round(seconds / (60.0 * 60)) |
|
145
|
|
|
return True, (("1 hour ago" if hours <= 1 else |
|
146
|
|
|
"%(hours)d hours ago") % {"hours": hours}) |
|
147
|
|
|
format = None |
|
148
|
|
|
if days == 0: |
|
149
|
|
|
format = "%(time)s" |
|
150
|
|
|
elif days == 1 and local_date.day == local_yesterday.day and \ |
|
151
|
|
|
relative: |
|
152
|
|
|
format = "yesterday" if shorter else "yesterday at %(time)s" |
|
153
|
|
|
elif days < 5: |
|
154
|
|
|
format = "%(weekday)s" if shorter else "%(weekday)s at %(time)s" |
|
155
|
|
|
elif days < 334: # 11mo, since confusing for same month last year |
|
156
|
|
|
format = "%(month)s-%(day)s" if shorter else \ |
|
157
|
|
|
"%(month)s-%(day)s at %(time)s" |
|
158
|
|
|
return False, format |
|
159
|
|
|
|
|
160
|
|
|
class TimeoutError(Exception): |
|
161
|
|
|
pass |
|
162
|
|
|
|
|
163
|
|
|
try: |
|
164
|
|
|
import signal |
|
165
|
|
|
if not hasattr(signal, 'SIGALRM'): |
|
166
|
|
|
raise ImportError('signal') |
|
167
|
|
|
|
|
168
|
|
|
class timeout: |
|
169
|
|
|
""" |
|
170
|
|
|
Time limit of command |
|
171
|
|
|
|
|
172
|
|
|
with timeout(3): |
|
173
|
|
|
time.sleep(10) |
|
174
|
|
|
""" |
|
175
|
|
|
|
|
176
|
|
|
def __init__(self, seconds=1, error_message='Timeout'): |
|
177
|
|
|
self.seconds = seconds |
|
178
|
|
|
self.error_message = error_message |
|
179
|
|
|
|
|
180
|
|
|
def handle_timeout(self, signum, frame): |
|
181
|
|
|
raise TimeoutError(self.error_message) |
|
182
|
|
|
|
|
183
|
|
|
def __enter__(self): |
|
184
|
|
|
if not isinstance(threading.current_thread(), threading._MainThread): |
|
185
|
|
|
logging.warning("timeout only works on main thread, are you running pyspider in threads?") |
|
186
|
|
|
self.seconds = 0 |
|
187
|
|
|
if self.seconds: |
|
188
|
|
|
signal.signal(signal.SIGALRM, self.handle_timeout) |
|
189
|
|
|
signal.alarm(int(math.ceil(self.seconds))) |
|
190
|
|
|
|
|
191
|
|
|
def __exit__(self, type, value, traceback): |
|
192
|
|
|
if self.seconds: |
|
193
|
|
|
signal.alarm(0) |
|
194
|
|
|
|
|
195
|
|
|
except ImportError as e: |
|
196
|
|
|
warnings.warn("timeout is not supported on your platform.", FutureWarning) |
|
197
|
|
|
|
|
198
|
|
|
class timeout: |
|
199
|
|
|
""" |
|
200
|
|
|
Time limit of command (for windows) |
|
201
|
|
|
""" |
|
202
|
|
|
|
|
203
|
|
|
def __init__(self, seconds=1, error_message='Timeout'): |
|
204
|
|
|
pass |
|
205
|
|
|
|
|
206
|
|
|
def __enter__(self): |
|
207
|
|
|
pass |
|
208
|
|
|
|
|
209
|
|
|
def __exit__(self, type, value, traceback): |
|
210
|
|
|
pass |
|
211
|
|
|
|
|
212
|
|
|
|
|
213
|
|
|
def utf8(string): |
|
214
|
|
|
""" |
|
215
|
|
|
Make sure string is utf8 encoded bytes. |
|
216
|
|
|
|
|
217
|
|
|
If parameter is a object, object.__str__ will been called before encode as bytes |
|
218
|
|
|
""" |
|
219
|
|
|
if isinstance(string, six.text_type): |
|
220
|
|
|
return string.encode('utf8') |
|
221
|
|
|
elif isinstance(string, six.binary_type): |
|
222
|
|
|
return string |
|
223
|
|
|
else: |
|
224
|
|
|
return six.text_type(string).encode('utf8') |
|
225
|
|
|
|
|
226
|
|
|
|
|
227
|
|
|
def text(string, encoding='utf8'): |
|
228
|
|
|
""" |
|
229
|
|
|
Make sure string is unicode type, decode with given encoding if it's not. |
|
230
|
|
|
|
|
231
|
|
|
If parameter is a object, object.__str__ will been called |
|
232
|
|
|
""" |
|
233
|
|
|
if isinstance(string, six.text_type): |
|
234
|
|
|
return string |
|
235
|
|
|
elif isinstance(string, six.binary_type): |
|
236
|
|
|
return string.decode(encoding) |
|
237
|
|
|
else: |
|
238
|
|
|
return six.text_type(string) |
|
239
|
|
|
|
|
240
|
|
|
|
|
241
|
|
|
def pretty_unicode(string): |
|
242
|
|
|
""" |
|
243
|
|
|
Make sure string is unicode, try to decode with utf8, or unicode escaped string if failed. |
|
244
|
|
|
""" |
|
245
|
|
|
if isinstance(string, six.text_type): |
|
246
|
|
|
return string |
|
247
|
|
|
try: |
|
248
|
|
|
return string.decode("utf8") |
|
249
|
|
|
except UnicodeDecodeError: |
|
250
|
|
|
return string.decode('Latin-1').encode('unicode_escape').decode("utf8") |
|
251
|
|
|
|
|
252
|
|
|
|
|
253
|
|
|
def unicode_string(string): |
|
254
|
|
|
""" |
|
255
|
|
|
Make sure string is unicode, try to default with utf8, or base64 if failed. |
|
256
|
|
|
|
|
257
|
|
|
can been decode by `decode_unicode_string` |
|
258
|
|
|
""" |
|
259
|
|
|
if isinstance(string, six.text_type): |
|
260
|
|
|
return string |
|
261
|
|
|
try: |
|
262
|
|
|
return string.decode("utf8") |
|
263
|
|
|
except UnicodeDecodeError: |
|
264
|
|
|
return '[BASE64-DATA]' + base64.b64encode(string) + '[/BASE64-DATA]' |
|
265
|
|
|
|
|
266
|
|
|
|
|
267
|
|
|
def unicode_dict(_dict): |
|
268
|
|
|
""" |
|
269
|
|
|
Make sure keys and values of dict is unicode. |
|
270
|
|
|
""" |
|
271
|
|
|
r = {} |
|
272
|
|
|
for k, v in iteritems(_dict): |
|
273
|
|
|
r[unicode_obj(k)] = unicode_obj(v) |
|
274
|
|
|
return r |
|
275
|
|
|
|
|
276
|
|
|
|
|
277
|
|
|
def unicode_list(_list): |
|
278
|
|
|
""" |
|
279
|
|
|
Make sure every element in list is unicode. bytes will encode in base64 |
|
280
|
|
|
""" |
|
281
|
|
|
return [unicode_obj(x) for x in _list] |
|
282
|
|
|
|
|
283
|
|
|
|
|
284
|
|
|
def unicode_obj(obj): |
|
285
|
|
|
""" |
|
286
|
|
|
Make sure keys and values of dict/list/tuple is unicode. bytes will encode in base64. |
|
287
|
|
|
|
|
288
|
|
|
Can been decode by `decode_unicode_obj` |
|
289
|
|
|
""" |
|
290
|
|
|
if isinstance(obj, dict): |
|
291
|
|
|
return unicode_dict(obj) |
|
292
|
|
|
elif isinstance(obj, (list, tuple)): |
|
293
|
|
|
return unicode_list(obj) |
|
294
|
|
|
elif isinstance(obj, six.string_types): |
|
295
|
|
|
return unicode_string(obj) |
|
296
|
|
|
elif isinstance(obj, (int, float)): |
|
297
|
|
|
return obj |
|
298
|
|
|
elif obj is None: |
|
299
|
|
|
return obj |
|
300
|
|
|
else: |
|
301
|
|
|
try: |
|
302
|
|
|
return text(obj) |
|
303
|
|
|
except: |
|
304
|
|
|
return text(repr(obj)) |
|
305
|
|
|
|
|
306
|
|
|
|
|
307
|
|
|
def decode_unicode_string(string): |
|
308
|
|
|
""" |
|
309
|
|
|
Decode string encoded by `unicode_string` |
|
310
|
|
|
""" |
|
311
|
|
|
if string.startswith('[BASE64-DATA]') and string.endswith('[/BASE64-DATA]'): |
|
312
|
|
|
return base64.b64decode(string[len('[BASE64-DATA]'):-len('[/BASE64-DATA]')]) |
|
313
|
|
|
return string |
|
314
|
|
|
|
|
315
|
|
|
|
|
316
|
|
|
def decode_unicode_obj(obj): |
|
317
|
|
|
""" |
|
318
|
|
|
Decode unicoded dict/list/tuple encoded by `unicode_obj` |
|
319
|
|
|
""" |
|
320
|
|
|
if isinstance(obj, dict): |
|
321
|
|
|
r = {} |
|
322
|
|
|
for k, v in iteritems(obj): |
|
323
|
|
|
r[decode_unicode_string(k)] = decode_unicode_obj(v) |
|
324
|
|
|
return r |
|
325
|
|
|
elif isinstance(obj, six.string_types): |
|
326
|
|
|
return decode_unicode_string(obj) |
|
327
|
|
|
elif isinstance(obj, (list, tuple)): |
|
328
|
|
|
return [decode_unicode_obj(x) for x in obj] |
|
329
|
|
|
else: |
|
330
|
|
|
return obj |
|
331
|
|
|
|
|
332
|
|
|
|
|
333
|
|
|
class Get(object): |
|
334
|
|
|
""" |
|
335
|
|
|
Lazy value calculate for object |
|
336
|
|
|
""" |
|
337
|
|
|
|
|
338
|
|
|
def __init__(self, getter): |
|
339
|
|
|
self.getter = getter |
|
340
|
|
|
|
|
341
|
|
|
def __get__(self, instance, owner): |
|
342
|
|
|
return self.getter() |
|
343
|
|
|
|
|
344
|
|
|
|
|
345
|
|
|
class ObjectDict(dict): |
|
346
|
|
|
""" |
|
347
|
|
|
Object like dict, every dict[key] can visite by dict.key |
|
348
|
|
|
|
|
349
|
|
|
If dict[key] is `Get`, calculate it's value. |
|
350
|
|
|
""" |
|
351
|
|
|
|
|
352
|
|
|
def __getattr__(self, name): |
|
353
|
|
|
ret = self.__getitem__(name) |
|
354
|
|
|
if hasattr(ret, '__get__'): |
|
355
|
|
|
return ret.__get__(self, ObjectDict) |
|
356
|
|
|
return ret |
|
357
|
|
|
|
|
358
|
|
|
|
|
359
|
|
|
def load_object(name): |
|
360
|
|
|
"""Load object from module""" |
|
361
|
|
|
|
|
362
|
|
|
if "." not in name: |
|
363
|
|
|
raise Exception('load object need module.object') |
|
364
|
|
|
|
|
365
|
|
|
module_name, object_name = name.rsplit('.', 1) |
|
366
|
|
|
if six.PY2: |
|
367
|
|
|
module = __import__(module_name, globals(), locals(), [utf8(object_name)], -1) |
|
368
|
|
|
else: |
|
369
|
|
|
module = __import__(module_name, globals(), locals(), [object_name]) |
|
370
|
|
|
return getattr(module, object_name) |
|
371
|
|
|
|
|
372
|
|
|
|
|
373
|
|
|
def get_python_console(namespace=None): |
|
374
|
|
|
""" |
|
375
|
|
|
Return a interactive python console instance with caller's stack |
|
376
|
|
|
""" |
|
377
|
|
|
|
|
378
|
|
|
if namespace is None: |
|
379
|
|
|
import inspect |
|
380
|
|
|
frame = inspect.currentframe() |
|
381
|
|
|
caller = frame.f_back |
|
382
|
|
|
if not caller: |
|
383
|
|
|
logging.error("can't find caller who start this console.") |
|
384
|
|
|
caller = frame |
|
385
|
|
|
namespace = dict(caller.f_globals) |
|
386
|
|
|
namespace.update(caller.f_locals) |
|
387
|
|
|
|
|
388
|
|
|
try: |
|
389
|
|
|
from IPython.terminal.interactiveshell import TerminalInteractiveShell |
|
390
|
|
|
shell = TerminalInteractiveShell(user_ns=namespace) |
|
391
|
|
|
except ImportError: |
|
392
|
|
|
try: |
|
393
|
|
|
import readline |
|
394
|
|
|
import rlcompleter |
|
395
|
|
|
readline.set_completer(rlcompleter.Completer(namespace).complete) |
|
396
|
|
|
readline.parse_and_bind("tab: complete") |
|
397
|
|
|
except ImportError: |
|
398
|
|
|
pass |
|
399
|
|
|
import code |
|
400
|
|
|
shell = code.InteractiveConsole(namespace) |
|
401
|
|
|
shell._quit = False |
|
402
|
|
|
|
|
403
|
|
|
def exit(): |
|
404
|
|
|
shell._quit = True |
|
405
|
|
|
|
|
406
|
|
|
def readfunc(prompt=""): |
|
407
|
|
|
if shell._quit: |
|
408
|
|
|
raise EOFError |
|
409
|
|
|
return six.moves.input(prompt) |
|
410
|
|
|
|
|
411
|
|
|
# inject exit method |
|
412
|
|
|
shell.ask_exit = exit |
|
413
|
|
|
shell.raw_input = readfunc |
|
414
|
|
|
|
|
415
|
|
|
return shell |
|
416
|
|
|
|
|
417
|
|
|
|
|
418
|
|
|
def python_console(namespace=None): |
|
419
|
|
|
"""Start a interactive python console with caller's stack""" |
|
420
|
|
|
|
|
421
|
|
|
if namespace is None: |
|
422
|
|
|
import inspect |
|
423
|
|
|
frame = inspect.currentframe() |
|
424
|
|
|
caller = frame.f_back |
|
425
|
|
|
if not caller: |
|
426
|
|
|
logging.error("can't find caller who start this console.") |
|
427
|
|
|
caller = frame |
|
428
|
|
|
namespace = dict(caller.f_globals) |
|
429
|
|
|
namespace.update(caller.f_locals) |
|
430
|
|
|
|
|
431
|
|
|
return get_python_console(namespace=namespace).interact() |
|
432
|
|
|
|
|
433
|
|
|
|
|
434
|
|
|
def check_port_open(port, addr='127.0.0.1'): |
|
435
|
|
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
|
436
|
|
|
result = sock.connect_ex((addr, port)) |
|
437
|
|
|
if result == 0: |
|
438
|
|
|
return True |
|
439
|
|
|
else: |
|
440
|
|
|
return False |
|
441
|
|
|
|