Passed
Push — master ( d2da6a...2770ac )
by Fabio
01:31
created

benedict.utils.io_util.read_file()   A

Complexity

Conditions 1

Size

Total Lines 5
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 5
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
# -*- coding: utf-8 -*-
2
3
from six import binary_type, string_types, StringIO
4
from slugify import slugify
5
6
import base64
7
import csv
8
import errno
9
import json
10
import os
11
import re
12
import requests
13
import xmltodict
14
import toml
15
import yaml
16
17
try:
18
    # python 3
19
    from urllib.parse import unquote
20
    from urllib.parse import unquote_plus
21
    from urllib.parse import urlencode
22
    from urllib.parse import parse_qs
23
except ImportError:
24
    # python 2
25
    from urllib import unquote
26
    from urllib import unquote_plus
27
    from urllib import urlencode
28
    from urlparse import parse_qs
29
30
31
def decode(s, format, **kwargs):
32
    decode_func = _get_format_decoder(format)
33
    if decode_func:
34
        decode_opts = kwargs.copy()
35
        data = decode_func(s.strip(), **decode_opts)
36
        return data
37
    else:
38
        raise ValueError('Invalid format: {}.'.format(format))
39
40
41
def decode_base64(s, **kwargs):
42
    # fix urlencoded chars
43
    s = unquote(s)
44
    # fix padding
45
    m = len(s) % 4
46
    if m != 0:
47
        s += '=' * (4 - m)
48
    data = base64.b64decode(s)
49
    subformat = kwargs.pop('subformat', None)
50
    encoding = kwargs.pop('encoding', 'utf-8' if subformat else None)
51
    if encoding:
52
        data = data.decode(encoding)
53
        if subformat:
54
            decode_func = _get_format_decoder(subformat)
55
            if decode_func:
56
                data = decode_func(data, **kwargs)
57
    return data
58
59
60
def decode_csv(s, **kwargs):
61
    # kwargs.setdefault('delimiter', ',')
62
    if kwargs.pop('quote', False):
63
        kwargs.setdefault('quoting', csv.QUOTE_ALL)
64
    columns = kwargs.pop('columns', None)
65
    columns_row = kwargs.pop('columns_row', True)
66
    f = StringIO(s)
67
    r = csv.reader(f, **kwargs)
68
    ln = 0
69
    data = []
70
    for row in r:
71
        if ln == 0 and columns_row:
72
            if not columns:
73
                columns = row
74
            ln += 1
75
            continue
76
        d = dict(zip(columns, row))
77
        data.append(d)
78
        ln += 1
79
    return data
80
81
82
def decode_json(s, **kwargs):
83
    data = json.loads(s, **kwargs)
84
    return data
85
86
87
def decode_query_string(s, **kwargs):
88
    flat = kwargs.pop('flat', True)
89
    qs_re = r'^(([\w\-\%\+]+\=[\w\-\%\+]*)+([\&]{1})?)+'
90
    qs_pattern = re.compile(qs_re)
91
    if qs_pattern.match(s):
92
        data = parse_qs(s)
93
        if flat:
94
            data = { key:value[0] for key, value in data.items() }
95
        return data
96
    else:
97
        raise ValueError('Invalid query string: {}'.format(s))
98
99
100
def decode_xml(s, **kwargs):
101
    kwargs.setdefault('dict_constructor', dict)
102
    data = xmltodict.parse(s, **kwargs)
103
    return data
104
105
106
def decode_toml(s, **kwargs):
107
    data = toml.loads(s, **kwargs)
108
    return data
109
110
111
def decode_yaml(s, **kwargs):
112
    kwargs.setdefault('Loader', yaml.Loader)
113
    data = yaml.load(s, **kwargs)
114
    return data
115
116
117
def encode(d, format, **kwargs):
118
    encode_func = _get_format_encoder(format)
119
    if encode_func:
120
        s = encode_func(d, **kwargs)
121
        return s
122
    else:
123
        raise ValueError('Invalid format: {}.'.format(format))
124
125
126
def encode_base64(d, **kwargs):
127
    data = d
128
    subformat = kwargs.pop('subformat', None)
129
    encoding = kwargs.pop('encoding', 'utf-8' if subformat else None)
130
    if not isinstance(data, string_types) and subformat:
131
        encode_func = _get_format_encoder(subformat)
132
        if encode_func:
133
            data = encode_func(data, **kwargs)
134
    if isinstance(data, string_types) and encoding:
135
        data = data.encode(encoding)
136
    data = base64.b64encode(data)
137
    if isinstance(data, binary_type) and encoding:
138
        data = data.decode(encoding)
139
    return data
140
141
142
def encode_csv(l, **kwargs):
143
    # kwargs.setdefault('delimiter', ',')
144
    if kwargs.pop('quote', False):
145
        kwargs.setdefault('quoting', csv.QUOTE_ALL)
146
    kwargs.setdefault('lineterminator', '\n')
147
    columns = kwargs.pop('columns', None)
148
    columns_row = kwargs.pop('columns_row', True)
149
    if not columns and len(l) and isinstance(l[0], dict):
150
        keys = [str(key) for key in l[0].keys()]
151
        columns = list(sorted(keys))
152
    f = StringIO()
153
    w = csv.writer(f, **kwargs)
154
    if columns_row and columns:
155
        w.writerow(columns)
156
    for item in l:
157
        if isinstance(item, dict):
158
            row = [item.get(key, '') for key in columns]
159
        elif isinstance(item, (list, tuple, set, )):
160
            row = item
161
        else:
162
            row = [item]
163
        w.writerow(row)
164
    data = f.getvalue()
165
    return data
166
167
168
def encode_json(d, **kwargs):
169
    data = json.dumps(d, **kwargs)
170
    return data
171
172
173
def encode_query_string(d, **kwargs):
174
    data = urlencode(d, **kwargs)
175
    return data
176
177
178
def encode_toml(d, **kwargs):
179
    data = toml.dumps(d, **kwargs)
180
    return data
181
182
183
def encode_xml(d, **kwargs):
184
    data = xmltodict.unparse(d, **kwargs)
185
    return data
186
187
188
def encode_yaml(d, **kwargs):
189
    data = yaml.dump(d, **kwargs)
190
    return data
191
192
193
def read_content(s):
194
    # s -> filepath or url or data
195
    num_lines = len(s.splitlines())
196
    if num_lines > 1:
197
        # data
198
        return s
199
    if any([s.startswith(protocol) for protocol in ['http://', 'https://']]):
200
        # url
201
        return read_url(s)
202
    elif any([s.endswith(extension) for extension in _get_formats_extensions()]):
203
        # filepath
204
        if os.path.isfile(s):
205
            return read_file(s)
206
        else:
207
            return None
208
    else:
209
        # data
210
        return s
211
212
213
def read_file(filepath):
214
    handler = open(filepath, 'r')
215
    content = handler.read()
216
    handler.close()
217
    return content
218
219
220
def read_url(url, *args, **kwargs):
221
    response = requests.get(url, *args, **kwargs)
222
    if response.status_code == requests.codes.ok:
223
        content = response.text
224
        return content
225
    else:
226
        raise ValueError(
227
            'Invalid url response status code: {}.'.format(
228
                response.status_code))
229
230
231
def write_file(filepath, content):
232
    # https://stackoverflow.com/questions/12517451/automatically-creating-directories-with-file-output
233
    filedir = os.path.dirname(filepath)
234
    if not os.path.exists(filedir):
235
        try:
236
            os.makedirs(filedir)
237
        except OSError as e:
238
            # Guard against race condition
239
            if e.errno != errno.EEXIST:
240
                raise e
241
    handler = open(filepath, 'w+')
242
    handler.write(content)
243
    handler.close()
244
    return True
245
246
247
_formats = {
248
    'b64': {
249
        'decoder': decode_base64,
250
        'encoder': encode_base64,
251
    },
252
    'base64': {
253
        'decoder': decode_base64,
254
        'encoder': encode_base64,
255
    },
256
    'csv': {
257
        'decoder': decode_csv,
258
        'encoder': encode_csv,
259
    },
260
    'json': {
261
        'decoder': decode_json,
262
        'encoder': encode_json,
263
    },
264
    'qs': {
265
        'decoder': decode_query_string,
266
        'encoder': encode_query_string,
267
    },
268
    'query_string': {
269
        'decoder': decode_query_string,
270
        'encoder': encode_query_string,
271
    },
272
    'toml': {
273
        'decoder': decode_toml,
274
        'encoder': encode_toml,
275
    },
276
    'yaml': {
277
        'decoder': decode_yaml,
278
        'encoder': encode_yaml,
279
    },
280
    'yml': {
281
        'decoder': decode_yaml,
282
        'encoder': encode_yaml,
283
    },
284
    'xml': {
285
        'decoder': decode_xml,
286
        'encoder': encode_xml,
287
    },
288
}
289
290
_formats_extensions = [
291
    '.{}'.format(extension) for extension in _formats.keys()]
292
293
294
def _get_format(format):
295
    return _formats.get(
296
        slugify(format, separator='_'), {})
297
298
299
def _get_format_decoder(format):
300
    return _get_format(format).get('decoder', None)
301
302
303
def _get_format_encoder(format):
304
    return _get_format(format).get('encoder', None)
305
306
307
def _get_formats_extensions():
308
    return _formats_extensions
309
310