|
1
|
|
|
# -*- coding: utf-8 -*- |
|
2
|
|
|
|
|
3
|
|
|
from six import binary_type, string_types, StringIO |
|
4
|
|
|
from slugify import slugify |
|
5
|
|
|
|
|
6
|
|
|
import base64 |
|
7
|
|
|
import csv |
|
8
|
|
|
import errno |
|
9
|
|
|
import json |
|
10
|
|
|
import os |
|
11
|
|
|
import re |
|
12
|
|
|
import requests |
|
13
|
|
|
import xmltodict |
|
14
|
|
|
import toml |
|
15
|
|
|
import yaml |
|
16
|
|
|
|
|
17
|
|
|
try: |
|
18
|
|
|
# python 3 |
|
19
|
|
|
from urllib.parse import unquote |
|
20
|
|
|
from urllib.parse import unquote_plus |
|
21
|
|
|
from urllib.parse import urlencode |
|
22
|
|
|
from urllib.parse import parse_qs |
|
23
|
|
|
except ImportError: |
|
24
|
|
|
# python 2 |
|
25
|
|
|
from urllib import unquote |
|
26
|
|
|
from urllib import unquote_plus |
|
27
|
|
|
from urllib import urlencode |
|
28
|
|
|
from urlparse import parse_qs |
|
29
|
|
|
|
|
30
|
|
|
|
|
31
|
|
|
def decode(s, format, **kwargs): |
|
32
|
|
|
decode_func = _get_format_decoder(format) |
|
33
|
|
|
if decode_func: |
|
34
|
|
|
decode_opts = kwargs.copy() |
|
35
|
|
|
data = decode_func(s.strip(), **decode_opts) |
|
36
|
|
|
return data |
|
37
|
|
|
else: |
|
38
|
|
|
raise ValueError('Invalid format: {}.'.format(format)) |
|
39
|
|
|
|
|
40
|
|
|
|
|
41
|
|
|
def decode_base64(s, **kwargs): |
|
42
|
|
|
# fix urlencoded chars |
|
43
|
|
|
s = unquote(s) |
|
44
|
|
|
# fix padding |
|
45
|
|
|
m = len(s) % 4 |
|
46
|
|
|
if m != 0: |
|
47
|
|
|
s += '=' * (4 - m) |
|
48
|
|
|
data = base64.b64decode(s) |
|
49
|
|
|
subformat = kwargs.pop('subformat', None) |
|
50
|
|
|
encoding = kwargs.pop('encoding', 'utf-8' if subformat else None) |
|
51
|
|
|
if encoding: |
|
52
|
|
|
data = data.decode(encoding) |
|
53
|
|
|
if subformat: |
|
54
|
|
|
decode_func = _get_format_decoder(subformat) |
|
55
|
|
|
if decode_func: |
|
56
|
|
|
data = decode_func(data, **kwargs) |
|
57
|
|
|
return data |
|
58
|
|
|
|
|
59
|
|
|
|
|
60
|
|
|
def decode_csv(s, **kwargs): |
|
61
|
|
|
# kwargs.setdefault('delimiter', ',') |
|
62
|
|
|
if kwargs.pop('quote', False): |
|
63
|
|
|
kwargs.setdefault('quoting', csv.QUOTE_ALL) |
|
64
|
|
|
columns = kwargs.pop('columns', None) |
|
65
|
|
|
columns_row = kwargs.pop('columns_row', True) |
|
66
|
|
|
f = StringIO(s) |
|
67
|
|
|
r = csv.reader(f, **kwargs) |
|
68
|
|
|
ln = 0 |
|
69
|
|
|
data = [] |
|
70
|
|
|
for row in r: |
|
71
|
|
|
if ln == 0 and columns_row: |
|
72
|
|
|
if not columns: |
|
73
|
|
|
columns = row |
|
74
|
|
|
ln += 1 |
|
75
|
|
|
continue |
|
76
|
|
|
d = dict(zip(columns, row)) |
|
77
|
|
|
data.append(d) |
|
78
|
|
|
ln += 1 |
|
79
|
|
|
return data |
|
80
|
|
|
|
|
81
|
|
|
|
|
82
|
|
|
def decode_json(s, **kwargs): |
|
83
|
|
|
data = json.loads(s, **kwargs) |
|
84
|
|
|
return data |
|
85
|
|
|
|
|
86
|
|
|
|
|
87
|
|
|
def decode_query_string(s, **kwargs): |
|
88
|
|
|
flat = kwargs.pop('flat', True) |
|
89
|
|
|
qs_re = r'^(([\w\-\%\+]+\=[\w\-\%\+]*)+([\&]{1})?)+' |
|
90
|
|
|
qs_pattern = re.compile(qs_re) |
|
91
|
|
|
if qs_pattern.match(s): |
|
92
|
|
|
data = parse_qs(s) |
|
93
|
|
|
if flat: |
|
94
|
|
|
data = { key:value[0] for key, value in data.items() } |
|
95
|
|
|
return data |
|
96
|
|
|
else: |
|
97
|
|
|
raise ValueError('Invalid query string: {}'.format(s)) |
|
98
|
|
|
|
|
99
|
|
|
|
|
100
|
|
|
def decode_xml(s, **kwargs): |
|
101
|
|
|
kwargs.setdefault('dict_constructor', dict) |
|
102
|
|
|
data = xmltodict.parse(s, **kwargs) |
|
103
|
|
|
return data |
|
104
|
|
|
|
|
105
|
|
|
|
|
106
|
|
|
def decode_toml(s, **kwargs): |
|
107
|
|
|
data = toml.loads(s, **kwargs) |
|
108
|
|
|
return data |
|
109
|
|
|
|
|
110
|
|
|
|
|
111
|
|
|
def decode_yaml(s, **kwargs): |
|
112
|
|
|
kwargs.setdefault('Loader', yaml.Loader) |
|
113
|
|
|
data = yaml.load(s, **kwargs) |
|
114
|
|
|
return data |
|
115
|
|
|
|
|
116
|
|
|
|
|
117
|
|
|
def encode(d, format, **kwargs): |
|
118
|
|
|
encode_func = _get_format_encoder(format) |
|
119
|
|
|
if encode_func: |
|
120
|
|
|
s = encode_func(d, **kwargs) |
|
121
|
|
|
return s |
|
122
|
|
|
else: |
|
123
|
|
|
raise ValueError('Invalid format: {}.'.format(format)) |
|
124
|
|
|
|
|
125
|
|
|
|
|
126
|
|
|
def encode_base64(d, **kwargs): |
|
127
|
|
|
data = d |
|
128
|
|
|
subformat = kwargs.pop('subformat', None) |
|
129
|
|
|
encoding = kwargs.pop('encoding', 'utf-8' if subformat else None) |
|
130
|
|
|
if not isinstance(data, string_types) and subformat: |
|
131
|
|
|
encode_func = _get_format_encoder(subformat) |
|
132
|
|
|
if encode_func: |
|
133
|
|
|
data = encode_func(data, **kwargs) |
|
134
|
|
|
if isinstance(data, string_types) and encoding: |
|
135
|
|
|
data = data.encode(encoding) |
|
136
|
|
|
data = base64.b64encode(data) |
|
137
|
|
|
if isinstance(data, binary_type) and encoding: |
|
138
|
|
|
data = data.decode(encoding) |
|
139
|
|
|
return data |
|
140
|
|
|
|
|
141
|
|
|
|
|
142
|
|
|
def encode_csv(l, **kwargs): |
|
143
|
|
|
# kwargs.setdefault('delimiter', ',') |
|
144
|
|
|
if kwargs.pop('quote', False): |
|
145
|
|
|
kwargs.setdefault('quoting', csv.QUOTE_ALL) |
|
146
|
|
|
kwargs.setdefault('lineterminator', '\n') |
|
147
|
|
|
columns = kwargs.pop('columns', None) |
|
148
|
|
|
columns_row = kwargs.pop('columns_row', True) |
|
149
|
|
|
if not columns and len(l) and isinstance(l[0], dict): |
|
150
|
|
|
keys = [str(key) for key in l[0].keys()] |
|
151
|
|
|
columns = list(sorted(keys)) |
|
152
|
|
|
f = StringIO() |
|
153
|
|
|
w = csv.writer(f, **kwargs) |
|
154
|
|
|
if columns_row and columns: |
|
155
|
|
|
w.writerow(columns) |
|
156
|
|
|
for item in l: |
|
157
|
|
|
if isinstance(item, dict): |
|
158
|
|
|
row = [item.get(key, '') for key in columns] |
|
159
|
|
|
elif isinstance(item, (list, tuple, set, )): |
|
160
|
|
|
row = item |
|
161
|
|
|
else: |
|
162
|
|
|
row = [item] |
|
163
|
|
|
w.writerow(row) |
|
164
|
|
|
data = f.getvalue() |
|
165
|
|
|
return data |
|
166
|
|
|
|
|
167
|
|
|
|
|
168
|
|
|
def encode_json(d, **kwargs): |
|
169
|
|
|
data = json.dumps(d, **kwargs) |
|
170
|
|
|
return data |
|
171
|
|
|
|
|
172
|
|
|
|
|
173
|
|
|
def encode_query_string(d, **kwargs): |
|
174
|
|
|
data = urlencode(d, **kwargs) |
|
175
|
|
|
return data |
|
176
|
|
|
|
|
177
|
|
|
|
|
178
|
|
|
def encode_toml(d, **kwargs): |
|
179
|
|
|
data = toml.dumps(d, **kwargs) |
|
180
|
|
|
return data |
|
181
|
|
|
|
|
182
|
|
|
|
|
183
|
|
|
def encode_xml(d, **kwargs): |
|
184
|
|
|
data = xmltodict.unparse(d, **kwargs) |
|
185
|
|
|
return data |
|
186
|
|
|
|
|
187
|
|
|
|
|
188
|
|
|
def encode_yaml(d, **kwargs): |
|
189
|
|
|
data = yaml.dump(d, **kwargs) |
|
190
|
|
|
return data |
|
191
|
|
|
|
|
192
|
|
|
|
|
193
|
|
|
def read_content(s): |
|
194
|
|
|
# s -> filepath or url or data |
|
195
|
|
|
num_lines = len(s.splitlines()) |
|
196
|
|
|
if num_lines > 1: |
|
197
|
|
|
# data |
|
198
|
|
|
return s |
|
199
|
|
|
if any([s.startswith(protocol) for protocol in ['http://', 'https://']]): |
|
200
|
|
|
# url |
|
201
|
|
|
return read_url(s) |
|
202
|
|
|
elif any([s.endswith(extension) for extension in _get_formats_extensions()]): |
|
203
|
|
|
# filepath |
|
204
|
|
|
if os.path.isfile(s): |
|
205
|
|
|
return read_file(s) |
|
206
|
|
|
else: |
|
207
|
|
|
return None |
|
208
|
|
|
else: |
|
209
|
|
|
# data |
|
210
|
|
|
return s |
|
211
|
|
|
|
|
212
|
|
|
|
|
213
|
|
|
def read_file(filepath): |
|
214
|
|
|
handler = open(filepath, 'r') |
|
215
|
|
|
content = handler.read() |
|
216
|
|
|
handler.close() |
|
217
|
|
|
return content |
|
218
|
|
|
|
|
219
|
|
|
|
|
220
|
|
|
def read_url(url, *args, **kwargs): |
|
221
|
|
|
response = requests.get(url, *args, **kwargs) |
|
222
|
|
|
if response.status_code == requests.codes.ok: |
|
223
|
|
|
content = response.text |
|
224
|
|
|
return content |
|
225
|
|
|
else: |
|
226
|
|
|
raise ValueError( |
|
227
|
|
|
'Invalid url response status code: {}.'.format( |
|
228
|
|
|
response.status_code)) |
|
229
|
|
|
|
|
230
|
|
|
|
|
231
|
|
|
def write_file(filepath, content): |
|
232
|
|
|
# https://stackoverflow.com/questions/12517451/automatically-creating-directories-with-file-output |
|
233
|
|
|
filedir = os.path.dirname(filepath) |
|
234
|
|
|
if not os.path.exists(filedir): |
|
235
|
|
|
try: |
|
236
|
|
|
os.makedirs(filedir) |
|
237
|
|
|
except OSError as e: |
|
238
|
|
|
# Guard against race condition |
|
239
|
|
|
if e.errno != errno.EEXIST: |
|
240
|
|
|
raise e |
|
241
|
|
|
handler = open(filepath, 'w+') |
|
242
|
|
|
handler.write(content) |
|
243
|
|
|
handler.close() |
|
244
|
|
|
return True |
|
245
|
|
|
|
|
246
|
|
|
|
|
247
|
|
|
_formats = { |
|
248
|
|
|
'b64': { |
|
249
|
|
|
'decoder': decode_base64, |
|
250
|
|
|
'encoder': encode_base64, |
|
251
|
|
|
}, |
|
252
|
|
|
'base64': { |
|
253
|
|
|
'decoder': decode_base64, |
|
254
|
|
|
'encoder': encode_base64, |
|
255
|
|
|
}, |
|
256
|
|
|
'csv': { |
|
257
|
|
|
'decoder': decode_csv, |
|
258
|
|
|
'encoder': encode_csv, |
|
259
|
|
|
}, |
|
260
|
|
|
'json': { |
|
261
|
|
|
'decoder': decode_json, |
|
262
|
|
|
'encoder': encode_json, |
|
263
|
|
|
}, |
|
264
|
|
|
'qs': { |
|
265
|
|
|
'decoder': decode_query_string, |
|
266
|
|
|
'encoder': encode_query_string, |
|
267
|
|
|
}, |
|
268
|
|
|
'query_string': { |
|
269
|
|
|
'decoder': decode_query_string, |
|
270
|
|
|
'encoder': encode_query_string, |
|
271
|
|
|
}, |
|
272
|
|
|
'toml': { |
|
273
|
|
|
'decoder': decode_toml, |
|
274
|
|
|
'encoder': encode_toml, |
|
275
|
|
|
}, |
|
276
|
|
|
'yaml': { |
|
277
|
|
|
'decoder': decode_yaml, |
|
278
|
|
|
'encoder': encode_yaml, |
|
279
|
|
|
}, |
|
280
|
|
|
'yml': { |
|
281
|
|
|
'decoder': decode_yaml, |
|
282
|
|
|
'encoder': encode_yaml, |
|
283
|
|
|
}, |
|
284
|
|
|
'xml': { |
|
285
|
|
|
'decoder': decode_xml, |
|
286
|
|
|
'encoder': encode_xml, |
|
287
|
|
|
}, |
|
288
|
|
|
} |
|
289
|
|
|
|
|
290
|
|
|
_formats_extensions = [ |
|
291
|
|
|
'.{}'.format(extension) for extension in _formats.keys()] |
|
292
|
|
|
|
|
293
|
|
|
|
|
294
|
|
|
def _get_format(format): |
|
295
|
|
|
return _formats.get( |
|
296
|
|
|
slugify(format, separator='_'), {}) |
|
297
|
|
|
|
|
298
|
|
|
|
|
299
|
|
|
def _get_format_decoder(format): |
|
300
|
|
|
return _get_format(format).get('decoder', None) |
|
301
|
|
|
|
|
302
|
|
|
|
|
303
|
|
|
def _get_format_encoder(format): |
|
304
|
|
|
return _get_format(format).get('encoder', None) |
|
305
|
|
|
|
|
306
|
|
|
|
|
307
|
|
|
def _get_formats_extensions(): |
|
308
|
|
|
return _formats_extensions |
|
309
|
|
|
|
|
310
|
|
|
|