Completed
Push — master ( 546b1e...359945 )
by Kolen
01:09
created

convert2table()   C

Complexity

Conditions 9

Size

Total Lines 54

Duplication

Lines 0
Ratio 0 %

Importance

Changes 14
Bugs 0 Features 0
Metric Value
cc 9
c 14
b 0
f 0
dl 0
loc 54
rs 5.4234

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
#!/usr/bin/env python3
2
3
r"""
4
Panflute filter to parse table in fenced YAML code blocks.
5
Currently only CSV table is supported.
6
7
7 metadata keys are recognized:
8
9
-   caption: the caption of the table. If omitted, no caption will be inserted.
10
-   alignment: a string of characters among L,R,C,D, case-insensitive,
11
        corresponds to Left-aligned, Right-aligned,
12
        Center-aligned, Default-aligned respectively.
13
    e.g. LCRD for a table with 4 columns
14
    default: DDD...
15
-   width: a list of relative width corresponding to the width of each columns.
16
    default: auto calculate from the length of each line in table cells.
17
-   table-width: the relative width of the table (e.g. relative to \linewidth).
18
    default: 1.0
19
-   header: If it has a header row. default: true
20
-   markdown: If CSV table cell contains markdown syntax. default: False
21
-   include: the path to an CSV file.
22
    If non-empty, override the CSV in the CodeBlock.
23
    default: None
24
25
When the metadata keys is invalid, the default will be used instead.
26
Note that width and table-width accept fractions as well.
27
28
e.g.
29
30
```table
31
---
32
caption: '*Awesome* **Markdown** Table'
33
alignment: RC
34
table-width: 2/3
35
markdown: True
36
---
37
First row,defaulted to be header row,can be disabled
38
1,cell can contain **markdown**,"It can be aribrary block element:
39
40
- following standard markdown syntax
41
- like this"
42
2,"Any markdown syntax, e.g.",$$E = mc^2$$
43
```
44
"""
45
46
import csv
47
import fractions
48
import io
49
import panflute
50
51
52
# begin helper functions
53
def to_bool(to_be_bool, default=True):
54
    """
55
    Do nothing if to_be_bool is boolean,
56
    return `False` if it is "false" or "no" (case-insensitive),
57
    otherwise return default.
58
    """
59
    if not isinstance(to_be_bool, bool):
60
        try:
61
            lowered_bool = to_be_bool.lower()
62
            if lowered_bool in ("false", "no"):
63
                to_be_bool = False
64
            elif lowered_bool in ("true", "yes"):
65
                to_be_bool = True
66
            else:
67
                raise ValueError
68
        except (ValueError, TypeError, AttributeError):
69
            to_be_bool = default
70
            panflute.debug("""pantable: invalid boolean. \
71
Should be true/false/yes/no, case-insensitive. Default is used.""")
72
    return to_be_bool
73
74
75
def get_width(options, number_of_columns):
76
    """
77
    get width: set to `None` when
78
79
    1. not given
80
    2. not a list
81
    3. length not equal to the number of columns
82
    4. negative entries
83
    """
84
    try:
85
        # if width not exists, exits immediately through except
86
        width = options['width']
87
        if len(width) != number_of_columns:
88
            raise ValueError
89
        custom_float = lambda x: float(fractions.Fraction(x))
90
        width = [custom_float(x) for x in options['width']]
91
        if not all(i >= 0 for i in width):
92
            raise ValueError
93
    except KeyError:
94
        width = None
95
    except (ValueError, TypeError):
96
        width = None
97
        panflute.debug("pantable: invalid width")
98
    return width
99
100
101
def get_table_width(options):
102
    """
103
    `table-width` set to `1.0` if invalid
104
    """
105
    try:
106
        table_width = float(fractions.Fraction(
107
            (options.get('table-width', 1.0))))
108
        assert table_width > 0
109
    except (ValueError, AssertionError, TypeError):
110
        table_width = 1.0
111
        panflute.debug("pantable: invalid table-width")
112
    return table_width
113
# end helper functions
114
115
116
def auto_width(table_width, number_of_columns, table_list):
117
    """
118
    `width` is auto-calculated if not given in YAML
119
    It also returns None when table is empty.
120
    """
121
    # calculate width
122
    # The +3 match the way pandoc handle width, see jgm/pandoc commit 0dfceda
123
    width_abs = [3 + max(
124
        [max(
125
            [len(line) for line in row[column_index].split("\n")]
126
        ) for row in table_list]
127
    ) for column_index in range(number_of_columns)]
128
    try:
129
        width_tot = sum(width_abs)
130
        # when all are 3 means all are empty, see comment above
131
        if width_tot == 3 * number_of_columns:
132
            raise ValueError
133
        width = [
134
            each_width / width_tot * table_width
135
            for each_width in width_abs
136
        ]
137
    except ValueError:
138
        panflute.debug("pantable: table is empty")
139
        width = None
140
    return width
141
142
143
def parse_alignment(alignment_string, number_of_columns):
144
    """
145
    `alignment` string is parsed into pandoc format (AlignDefault, etc.).
146
    Cases are checked:
147
148
    - if not given, return None (let panflute handle it)
149
    - if wrong type
150
    - if too long
151
    - if invalid characters are given
152
    - if too short
153
    """
154
    # alignment string can be None or empty; return None: set to default by
155
    # panflute
156
    if not alignment_string:
157
        return None
158
    # prepare alignment_string
159
    try:
160
        # test valid type
161
        if not isinstance(alignment_string, str):
162
            raise TypeError
163
        number_of_alignments = len(alignment_string)
164
        # truncate and debug if too long
165
        assert number_of_alignments < number_of_columns
166
    except TypeError:
167
        panflute.debug("pantable: alignment string is invalid")
168
        # return None: set to default by panflute
169
        return None
170
    except AssertionError:
171
        alignment_string = alignment_string[:number_of_columns]
172
        panflute.debug(
173
            "pantable: alignment string is too long, truncated instead.")
174
    # parsing alignment
175
    alignment = [("AlignLeft" if i.lower() == "l"
176
                  else "AlignCenter" if i.lower() == "c"
177
                  else "AlignRight" if i.lower() == "r"
178
                  else "AlignDefault" if i.lower() == "d"
179
                  else None) for i in alignment_string]
180
    # debug if invalid; set to default
181
    try:
182
        assert None not in alignment
183
    except AssertionError:
184
        alignment = [(i if i is not None else "AlignDefault")
185
                     for i in alignment]
186
        panflute.debug(
187
            "pantable: alignment: invalid character found, default is used instead.")
188
    # fill up with default if too short
189
    if number_of_columns > number_of_alignments:
190
        alignment += ["AlignDefault" for __ in range(
191
            number_of_columns - number_of_alignments)]
192
    return alignment
193
194
195
def read_data(include, data):
196
    """
197
    read csv and return the table in list.
198
    Return None when the include path is invalid.
199
    """
200
    if include is None:
201
        with io.StringIO(data) as file:
202
            raw_table_list = list(csv.reader(file))
203
    else:
204
        try:
205
            with open(str(include)) as file:
206
                raw_table_list = list(csv.reader(file))
207
        except FileNotFoundError:
208
            raw_table_list = None
209
            panflute.debug('{} {}'.format(
210
                "pantable: file not found from the path", include))
211
    return raw_table_list
212
213
214
def regularize_table_list(raw_table_list):
215
    """
216
    When the length of rows are uneven, make it as long as the longest row.
217
    """
218
    length_of_rows = [len(row) for row in raw_table_list]
219
    number_of_columns = max(length_of_rows)
220
    try:
221
        assert all(i == number_of_columns for i in length_of_rows)
222
        table_list = raw_table_list
223
    except AssertionError:
224
        table_list = [
225
            row + ['' for __ in range(number_of_columns - len(row))] for row in raw_table_list]
226
        panflute.debug(
227
            "pantable: table rows are of irregular length. Empty cells appended.")
228
    return (table_list, number_of_columns)
229
230
231
def parse_table_list(markdown, table_list):
232
    """
233
    read table in list and return panflute table format
234
    """
235
    # make functions local
236
    to_table_row = panflute.TableRow
237
    if markdown:
238
        to_table_cell = lambda x: panflute.TableCell(*panflute.convert_text(x))
239
    else:
240
        to_table_cell = lambda x: panflute.TableCell(
241
            panflute.Plain(panflute.Str(x)))
242
    return [to_table_row(*[to_table_cell(x) for x in row]) for row in table_list]
243
244
245
def convert2table(options, data, **__):
246
    """
247
    provided to panflute.yaml_filter to parse its content as pandoc table.
248
    """
249
    # prepare table in list from data/include
250
    raw_table_list = read_data(options.get('include', None), data)
251
    # delete element if table is empty (by returning [])
252
    # element unchanged if include is invalid (by returning None)
253
    try:
254
        if not raw_table_list or raw_table_list is None:
255
            raise ValueError
256
    except ValueError:
257
        panflute.debug("pantable: table is empty or include is invalid")
258
        # [] means delete the current element; None means kept as is
259
        return raw_table_list
260
    # regularize table: all rows should have same length
261
    table_list, number_of_columns = regularize_table_list(raw_table_list)
262
263
    # Initialize the `options` output from `panflute.yaml_filter`
264
    # parse width
265
    width = get_width(options, number_of_columns)
266
    # auto-width when width is not specified
267
    if width is None:
268
        width = auto_width(get_table_width(
269
            options), number_of_columns, table_list)
270
    # delete element if table is empty (by returning [])
271
    # width remains None only when table is empty
272
    try:
273
        if width is None:
274
            raise ValueError
275
    except ValueError:
276
        panflute.debug("pantable: table is empty")
277
        return []
278
    # parse alignment
279
    alignment = parse_alignment(options.get(
280
        'alignment', None), number_of_columns)
281
    header = to_bool(options.get('header', True), True)
282
    markdown = to_bool(options.get('markdown', False), False)
283
284
    # get caption: parsed as markdown into panflute AST if non-empty.
285
    caption = panflute.convert_text(str(options['caption']))[
286
        0].content if 'caption' in options else None
287
    # parse list to panflute table
288
    table_body = parse_table_list(markdown, table_list)
289
    # extract header row
290
    header_row = table_body.pop(0) if (
291
        len(table_body) > 1 and header
292
    ) else None
293
    return panflute.Table(
294
        *table_body,
295
        caption=caption,
296
        alignment=alignment,
297
        width=width,
298
        header=header_row
299
    )
300
301
302
def main(_=None):
303
    """
304
    Fenced code block with class table will be parsed using
305
    panflute.yaml_filter with the fuction convert2table above.
306
    """
307
    return panflute.run_filter(
308
        panflute.yaml_filter,
309
        tag='table',
310
        function=convert2table,
311
        strict_yaml=True
312
    )
313
314
if __name__ == '__main__':
315
    main()
316