1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- encoding: utf-8 -*- |
3
|
|
|
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8: |
4
|
|
|
# Author: Binux<[email protected]> |
5
|
|
|
# http://binux.me |
6
|
|
|
# Created on 2012-11-06 11:50:13 |
7
|
|
|
|
8
|
|
|
import math |
9
|
|
|
import logging |
10
|
|
|
import hashlib |
11
|
|
|
import datetime |
12
|
|
|
import socket |
13
|
|
|
import base64 |
14
|
|
|
import warnings |
15
|
|
|
import threading |
16
|
|
|
|
17
|
|
|
import six |
18
|
|
|
from six import iteritems |
19
|
|
|
|
20
|
|
|
md5string = lambda x: hashlib.md5(utf8(x)).hexdigest() |
21
|
|
|
|
22
|
|
|
|
23
|
|
|
class ReadOnlyDict(dict): |
24
|
|
|
"""A Read Only Dict""" |
25
|
|
|
|
26
|
|
|
def __setitem__(self, key, value): |
27
|
|
|
raise Exception("dict is read-only") |
28
|
|
|
|
29
|
|
|
|
30
|
|
|
def getitem(obj, key=0, default=None): |
31
|
|
|
"""Get first element of list or return default""" |
32
|
|
|
try: |
33
|
|
|
return obj[key] |
34
|
|
|
except: |
35
|
|
|
return default |
36
|
|
|
|
37
|
|
|
|
38
|
|
|
def hide_me(tb, g=globals()): |
39
|
|
|
"""Hide stack traceback of given stack""" |
40
|
|
|
base_tb = tb |
41
|
|
|
try: |
42
|
|
|
while tb and tb.tb_frame.f_globals is not g: |
43
|
|
|
tb = tb.tb_next |
44
|
|
|
while tb and tb.tb_frame.f_globals is g: |
45
|
|
|
tb = tb.tb_next |
46
|
|
|
except Exception as e: |
47
|
|
|
logging.exception(e) |
48
|
|
|
tb = base_tb |
49
|
|
|
if not tb: |
50
|
|
|
tb = base_tb |
51
|
|
|
return tb |
52
|
|
|
|
53
|
|
|
|
54
|
|
|
def run_in_thread(func, *args, **kwargs): |
55
|
|
|
"""Run function in thread, return a Thread object""" |
56
|
|
|
from threading import Thread |
57
|
|
|
thread = Thread(target=func, args=args, kwargs=kwargs) |
58
|
|
|
thread.daemon = True |
59
|
|
|
thread.start() |
60
|
|
|
return thread |
61
|
|
|
|
62
|
|
|
|
63
|
|
|
def run_in_subprocess(func, *args, **kwargs): |
64
|
|
|
"""Run function in subprocess, return a Process object""" |
65
|
|
|
from multiprocessing import Process |
66
|
|
|
thread = Process(target=func, args=args, kwargs=kwargs) |
67
|
|
|
thread.daemon = True |
68
|
|
|
thread.start() |
69
|
|
|
return thread |
70
|
|
|
|
71
|
|
|
|
72
|
|
|
def format_date(date, gmt_offset=0, relative=True, shorter=False, full_format=False): |
73
|
|
|
"""Formats the given date (which should be GMT). |
74
|
|
|
|
75
|
|
|
By default, we return a relative time (e.g., "2 minutes ago"). You |
76
|
|
|
can return an absolute date string with ``relative=False``. |
77
|
|
|
|
78
|
|
|
You can force a full format date ("July 10, 1980") with |
79
|
|
|
``full_format=True``. |
80
|
|
|
|
81
|
|
|
This method is primarily intended for dates in the past. |
82
|
|
|
For dates in the future, we fall back to full format. |
83
|
|
|
|
84
|
|
|
From tornado |
85
|
|
|
""" |
86
|
|
|
|
87
|
|
|
if not date: |
88
|
|
|
return '-' |
89
|
|
|
if isinstance(date, float) or isinstance(date, int): |
90
|
|
|
date = datetime.datetime.utcfromtimestamp(date) |
91
|
|
|
now = datetime.datetime.utcnow() |
92
|
|
|
if date > now: |
93
|
|
|
if relative and (date - now).seconds < 60: |
94
|
|
|
# Due to click skew, things are some things slightly |
95
|
|
|
# in the future. Round timestamps in the immediate |
96
|
|
|
# future down to now in relative mode. |
97
|
|
|
date = now |
98
|
|
|
else: |
99
|
|
|
# Otherwise, future dates always use the full format. |
100
|
|
|
full_format = True |
101
|
|
|
local_date = date - datetime.timedelta(minutes=gmt_offset) |
102
|
|
|
local_now = now - datetime.timedelta(minutes=gmt_offset) |
103
|
|
|
local_yesterday = local_now - datetime.timedelta(hours=24) |
104
|
|
|
difference = now - date |
105
|
|
|
seconds = difference.seconds |
106
|
|
|
days = difference.days |
107
|
|
|
|
108
|
|
|
format = None |
109
|
|
|
if not full_format: |
110
|
|
|
ret_, fff_format = fix_full_format(days, seconds, relative, shorter, local_date, local_yesterday) |
111
|
|
|
format = fff_format |
112
|
|
|
if ret_: |
113
|
|
|
return format |
114
|
|
|
else: |
115
|
|
|
format = format |
116
|
|
|
|
117
|
|
|
if format is None: |
118
|
|
|
format = "%(month_name)s %(day)s, %(year)s" if shorter else \ |
119
|
|
|
"%(month_name)s %(day)s, %(year)s at %(time)s" |
120
|
|
|
|
121
|
|
|
str_time = "%d:%02d" % (local_date.hour, local_date.minute) |
122
|
|
|
|
123
|
|
|
return format % { |
124
|
|
|
"month_name": local_date.strftime('%b'), |
125
|
|
|
"weekday": local_date.strftime('%A'), |
126
|
|
|
"day": str(local_date.day), |
127
|
|
|
"year": str(local_date.year), |
128
|
|
|
"month": local_date.month, |
129
|
|
|
"time": str_time |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
|
133
|
|
|
def fix_full_format(days, seconds, relative, shorter, local_date, local_yesterday): |
134
|
|
|
if relative and days == 0: |
135
|
|
|
if seconds < 50: |
136
|
|
|
return True, (("1 second ago" if seconds <= 1 else |
137
|
|
|
"%(seconds)d seconds ago") % {"seconds": seconds}) |
138
|
|
|
|
139
|
|
|
if seconds < 50 * 60: |
140
|
|
|
minutes = round(seconds / 60.0) |
141
|
|
|
return True, (("1 minute ago" if minutes <= 1 else |
142
|
|
|
"%(minutes)d minutes ago") % {"minutes": minutes}) |
143
|
|
|
|
144
|
|
|
hours = round(seconds / (60.0 * 60)) |
145
|
|
|
return True, (("1 hour ago" if hours <= 1 else |
146
|
|
|
"%(hours)d hours ago") % {"hours": hours}) |
147
|
|
|
format = None |
148
|
|
|
if days == 0: |
149
|
|
|
format = "%(time)s" |
150
|
|
|
elif days == 1 and local_date.day == local_yesterday.day and \ |
151
|
|
|
relative: |
152
|
|
|
format = "yesterday" if shorter else "yesterday at %(time)s" |
153
|
|
|
elif days < 5: |
154
|
|
|
format = "%(weekday)s" if shorter else "%(weekday)s at %(time)s" |
155
|
|
|
elif days < 334: # 11mo, since confusing for same month last year |
156
|
|
|
format = "%(month)s-%(day)s" if shorter else \ |
157
|
|
|
"%(month)s-%(day)s at %(time)s" |
158
|
|
|
return False, format |
159
|
|
|
|
160
|
|
|
class TimeoutError(Exception): |
161
|
|
|
pass |
162
|
|
|
|
163
|
|
|
try: |
164
|
|
|
import signal |
165
|
|
|
if not hasattr(signal, 'SIGALRM'): |
166
|
|
|
raise ImportError('signal') |
167
|
|
|
|
168
|
|
|
class timeout: |
169
|
|
|
""" |
170
|
|
|
Time limit of command |
171
|
|
|
|
172
|
|
|
with timeout(3): |
173
|
|
|
time.sleep(10) |
174
|
|
|
""" |
175
|
|
|
|
176
|
|
|
def __init__(self, seconds=1, error_message='Timeout'): |
177
|
|
|
self.seconds = seconds |
178
|
|
|
self.error_message = error_message |
179
|
|
|
|
180
|
|
|
def handle_timeout(self, signum, frame): |
181
|
|
|
raise TimeoutError(self.error_message) |
182
|
|
|
|
183
|
|
|
def __enter__(self): |
184
|
|
|
if not isinstance(threading.current_thread(), threading._MainThread): |
185
|
|
|
logging.warning("timeout only works on main thread, are you running pyspider in threads?") |
186
|
|
|
self.seconds = 0 |
187
|
|
|
if self.seconds: |
188
|
|
|
signal.signal(signal.SIGALRM, self.handle_timeout) |
189
|
|
|
signal.alarm(int(math.ceil(self.seconds))) |
190
|
|
|
|
191
|
|
|
def __exit__(self, type, value, traceback): |
192
|
|
|
if self.seconds: |
193
|
|
|
signal.alarm(0) |
194
|
|
|
|
195
|
|
|
except ImportError as e: |
196
|
|
|
warnings.warn("timeout is not supported on your platform.", FutureWarning) |
197
|
|
|
|
198
|
|
|
class timeout: |
199
|
|
|
""" |
200
|
|
|
Time limit of command (for windows) |
201
|
|
|
""" |
202
|
|
|
|
203
|
|
|
def __init__(self, seconds=1, error_message='Timeout'): |
204
|
|
|
pass |
205
|
|
|
|
206
|
|
|
def __enter__(self): |
207
|
|
|
pass |
208
|
|
|
|
209
|
|
|
def __exit__(self, type, value, traceback): |
210
|
|
|
pass |
211
|
|
|
|
212
|
|
|
|
213
|
|
|
def utf8(string): |
214
|
|
|
""" |
215
|
|
|
Make sure string is utf8 encoded bytes. |
216
|
|
|
|
217
|
|
|
If parameter is a object, object.__str__ will been called before encode as bytes |
218
|
|
|
""" |
219
|
|
|
if isinstance(string, six.text_type): |
220
|
|
|
return string.encode('utf8') |
221
|
|
|
elif isinstance(string, six.binary_type): |
222
|
|
|
return string |
223
|
|
|
else: |
224
|
|
|
return six.text_type(string).encode('utf8') |
225
|
|
|
|
226
|
|
|
|
227
|
|
|
def text(string, encoding='utf8'): |
228
|
|
|
""" |
229
|
|
|
Make sure string is unicode type, decode with given encoding if it's not. |
230
|
|
|
|
231
|
|
|
If parameter is a object, object.__str__ will been called |
232
|
|
|
""" |
233
|
|
|
if isinstance(string, six.text_type): |
234
|
|
|
return string |
235
|
|
|
elif isinstance(string, six.binary_type): |
236
|
|
|
return string.decode(encoding) |
237
|
|
|
else: |
238
|
|
|
return six.text_type(string) |
239
|
|
|
|
240
|
|
|
|
241
|
|
|
def pretty_unicode(string): |
242
|
|
|
""" |
243
|
|
|
Make sure string is unicode, try to decode with utf8, or unicode escaped string if failed. |
244
|
|
|
""" |
245
|
|
|
if isinstance(string, six.text_type): |
246
|
|
|
return string |
247
|
|
|
try: |
248
|
|
|
return string.decode("utf8") |
249
|
|
|
except UnicodeDecodeError: |
250
|
|
|
return string.decode('Latin-1').encode('unicode_escape').decode("utf8") |
251
|
|
|
|
252
|
|
|
|
253
|
|
|
def unicode_string(string): |
254
|
|
|
""" |
255
|
|
|
Make sure string is unicode, try to default with utf8, or base64 if failed. |
256
|
|
|
|
257
|
|
|
can been decode by `decode_unicode_string` |
258
|
|
|
""" |
259
|
|
|
if isinstance(string, six.text_type): |
260
|
|
|
return string |
261
|
|
|
try: |
262
|
|
|
return string.decode("utf8") |
263
|
|
|
except UnicodeDecodeError: |
264
|
|
|
return '[BASE64-DATA]' + base64.b64encode(string) + '[/BASE64-DATA]' |
265
|
|
|
|
266
|
|
|
|
267
|
|
|
def unicode_dict(_dict): |
268
|
|
|
""" |
269
|
|
|
Make sure keys and values of dict is unicode. |
270
|
|
|
""" |
271
|
|
|
r = {} |
272
|
|
|
for k, v in iteritems(_dict): |
273
|
|
|
r[unicode_obj(k)] = unicode_obj(v) |
274
|
|
|
return r |
275
|
|
|
|
276
|
|
|
|
277
|
|
|
def unicode_list(_list): |
278
|
|
|
""" |
279
|
|
|
Make sure every element in list is unicode. bytes will encode in base64 |
280
|
|
|
""" |
281
|
|
|
return [unicode_obj(x) for x in _list] |
282
|
|
|
|
283
|
|
|
|
284
|
|
|
def unicode_obj(obj): |
285
|
|
|
""" |
286
|
|
|
Make sure keys and values of dict/list/tuple is unicode. bytes will encode in base64. |
287
|
|
|
|
288
|
|
|
Can been decode by `decode_unicode_obj` |
289
|
|
|
""" |
290
|
|
|
if isinstance(obj, dict): |
291
|
|
|
return unicode_dict(obj) |
292
|
|
|
elif isinstance(obj, (list, tuple)): |
293
|
|
|
return unicode_list(obj) |
294
|
|
|
elif isinstance(obj, six.string_types): |
295
|
|
|
return unicode_string(obj) |
296
|
|
|
elif isinstance(obj, (int, float)): |
297
|
|
|
return obj |
298
|
|
|
elif obj is None: |
299
|
|
|
return obj |
300
|
|
|
else: |
301
|
|
|
try: |
302
|
|
|
return text(obj) |
303
|
|
|
except: |
304
|
|
|
return text(repr(obj)) |
305
|
|
|
|
306
|
|
|
|
307
|
|
|
def decode_unicode_string(string): |
308
|
|
|
""" |
309
|
|
|
Decode string encoded by `unicode_string` |
310
|
|
|
""" |
311
|
|
|
if string.startswith('[BASE64-DATA]') and string.endswith('[/BASE64-DATA]'): |
312
|
|
|
return base64.b64decode(string[len('[BASE64-DATA]'):-len('[/BASE64-DATA]')]) |
313
|
|
|
return string |
314
|
|
|
|
315
|
|
|
|
316
|
|
|
def decode_unicode_obj(obj): |
317
|
|
|
""" |
318
|
|
|
Decode unicoded dict/list/tuple encoded by `unicode_obj` |
319
|
|
|
""" |
320
|
|
|
if isinstance(obj, dict): |
321
|
|
|
r = {} |
322
|
|
|
for k, v in iteritems(obj): |
323
|
|
|
r[decode_unicode_string(k)] = decode_unicode_obj(v) |
324
|
|
|
return r |
325
|
|
|
elif isinstance(obj, six.string_types): |
326
|
|
|
return decode_unicode_string(obj) |
327
|
|
|
elif isinstance(obj, (list, tuple)): |
328
|
|
|
return [decode_unicode_obj(x) for x in obj] |
329
|
|
|
else: |
330
|
|
|
return obj |
331
|
|
|
|
332
|
|
|
|
333
|
|
|
class Get(object): |
334
|
|
|
""" |
335
|
|
|
Lazy value calculate for object |
336
|
|
|
""" |
337
|
|
|
|
338
|
|
|
def __init__(self, getter): |
339
|
|
|
self.getter = getter |
340
|
|
|
|
341
|
|
|
def __get__(self, instance, owner): |
342
|
|
|
return self.getter() |
343
|
|
|
|
344
|
|
|
|
345
|
|
|
class ObjectDict(dict): |
346
|
|
|
""" |
347
|
|
|
Object like dict, every dict[key] can visite by dict.key |
348
|
|
|
|
349
|
|
|
If dict[key] is `Get`, calculate it's value. |
350
|
|
|
""" |
351
|
|
|
|
352
|
|
|
def __getattr__(self, name): |
353
|
|
|
ret = self.__getitem__(name) |
354
|
|
|
if hasattr(ret, '__get__'): |
355
|
|
|
return ret.__get__(self, ObjectDict) |
356
|
|
|
return ret |
357
|
|
|
|
358
|
|
|
|
359
|
|
|
def load_object(name): |
360
|
|
|
"""Load object from module""" |
361
|
|
|
|
362
|
|
|
if "." not in name: |
363
|
|
|
raise Exception('load object need module.object') |
364
|
|
|
|
365
|
|
|
module_name, object_name = name.rsplit('.', 1) |
366
|
|
|
if six.PY2: |
367
|
|
|
module = __import__(module_name, globals(), locals(), [utf8(object_name)], -1) |
368
|
|
|
else: |
369
|
|
|
module = __import__(module_name, globals(), locals(), [object_name]) |
370
|
|
|
return getattr(module, object_name) |
371
|
|
|
|
372
|
|
|
|
373
|
|
|
def get_python_console(namespace=None): |
374
|
|
|
""" |
375
|
|
|
Return a interactive python console instance with caller's stack |
376
|
|
|
""" |
377
|
|
|
|
378
|
|
|
if namespace is None: |
379
|
|
|
import inspect |
380
|
|
|
frame = inspect.currentframe() |
381
|
|
|
caller = frame.f_back |
382
|
|
|
if not caller: |
383
|
|
|
logging.error("can't find caller who start this console.") |
384
|
|
|
caller = frame |
385
|
|
|
namespace = dict(caller.f_globals) |
386
|
|
|
namespace.update(caller.f_locals) |
387
|
|
|
|
388
|
|
|
try: |
389
|
|
|
from IPython.terminal.interactiveshell import TerminalInteractiveShell |
390
|
|
|
shell = TerminalInteractiveShell(user_ns=namespace) |
391
|
|
|
except ImportError: |
392
|
|
|
try: |
393
|
|
|
import readline |
394
|
|
|
import rlcompleter |
395
|
|
|
readline.set_completer(rlcompleter.Completer(namespace).complete) |
396
|
|
|
readline.parse_and_bind("tab: complete") |
397
|
|
|
except ImportError: |
398
|
|
|
pass |
399
|
|
|
import code |
400
|
|
|
shell = code.InteractiveConsole(namespace) |
401
|
|
|
shell._quit = False |
402
|
|
|
|
403
|
|
|
def exit(): |
404
|
|
|
shell._quit = True |
405
|
|
|
|
406
|
|
|
def readfunc(prompt=""): |
407
|
|
|
if shell._quit: |
408
|
|
|
raise EOFError |
409
|
|
|
return six.moves.input(prompt) |
410
|
|
|
|
411
|
|
|
# inject exit method |
412
|
|
|
shell.ask_exit = exit |
413
|
|
|
shell.raw_input = readfunc |
414
|
|
|
|
415
|
|
|
return shell |
416
|
|
|
|
417
|
|
|
|
418
|
|
|
def python_console(namespace=None): |
419
|
|
|
"""Start a interactive python console with caller's stack""" |
420
|
|
|
|
421
|
|
|
if namespace is None: |
422
|
|
|
import inspect |
423
|
|
|
frame = inspect.currentframe() |
424
|
|
|
caller = frame.f_back |
425
|
|
|
if not caller: |
426
|
|
|
logging.error("can't find caller who start this console.") |
427
|
|
|
caller = frame |
428
|
|
|
namespace = dict(caller.f_globals) |
429
|
|
|
namespace.update(caller.f_locals) |
430
|
|
|
|
431
|
|
|
return get_python_console(namespace=namespace).interact() |
432
|
|
|
|
433
|
|
|
|
434
|
|
|
def check_port_open(port, addr='127.0.0.1'): |
435
|
|
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
436
|
|
|
result = sock.connect_ex((addr, port)) |
437
|
|
|
if result == 0: |
438
|
|
|
return True |
439
|
|
|
else: |
440
|
|
|
return False |
441
|
|
|
|