Completed
Push — master ( 658df1...045d66 )
by Kolen
01:07
created

convert2table()   C

Complexity

Conditions 9

Size

Total Lines 55

Duplication

Lines 0
Ratio 0 %

Importance

Changes 17
Bugs 0 Features 0
Metric Value
cc 9
c 17
b 0
f 0
dl 0
loc 55
rs 5.4159

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
#!/usr/bin/env python3
2
3
r"""
4
Panflute filter to parse table in fenced YAML code blocks.
5
Currently only CSV table is supported.
6
7
7 metadata keys are recognized:
8
9
-   caption: the caption of the table. If omitted, no caption will be inserted.
10
-   alignment: a string of characters among L,R,C,D, case-insensitive,
11
        corresponds to Left-aligned, Right-aligned,
12
        Center-aligned, Default-aligned respectively.
13
    e.g. LCRD for a table with 4 columns
14
    default: DDD...
15
-   width: a list of relative width corresponding to the width of each columns.
16
    default: auto calculate from the length of each line in table cells.
17
-   table-width: the relative width of the table (e.g. relative to \linewidth).
18
    default: 1.0
19
-   header: If it has a header row. default: true
20
-   markdown: If CSV table cell contains markdown syntax. default: False
21
-   include: the path to an CSV file.
22
    If non-empty, override the CSV in the CodeBlock.
23
    default: None
24
25
When the metadata keys is invalid, the default will be used instead.
26
Note that width and table-width accept fractions as well.
27
28
e.g.
29
30
```table
31
---
32
caption: '*Awesome* **Markdown** Table'
33
alignment: RC
34
table-width: 2/3
35
markdown: True
36
---
37
First row,defaulted to be header row,can be disabled
38
1,cell can contain **markdown**,"It can be aribrary block element:
39
40
- following standard markdown syntax
41
- like this"
42
2,"Any markdown syntax, e.g.",$$E = mc^2$$
43
```
44
"""
45
46
import csv
47
from fractions import Fraction
48
import io
49
import panflute
50
51
52
# begin helper functions
53
def to_bool(to_be_bool, default=True):
54
    """
55
    Do nothing if to_be_bool is boolean,
56
    return `False` if it is "false" or "no" (case-insensitive),
57
    otherwise return default.
58
    """
59
    if not isinstance(to_be_bool, bool):
60
        try:
61
            if to_be_bool.lower() in ("false", "no"):
62
                to_be_bool = False
63
            elif to_be_bool.lower() in ("true", "yes"):
64
                to_be_bool = True
65
            else:
66
                raise ValueError
67
        except (ValueError, TypeError, AttributeError):
68
            to_be_bool = default
69
            panflute.debug("""pantable: invalid boolean. \
70
Should be true/false/yes/no, case-insensitive. Default is used.""")
71
    return to_be_bool
72
73
74
def get_width(options, number_of_columns):
75
    """
76
    get width: set to `None` when
77
78
    1. not given
79
    2. not a list
80
    3. length not equal to the number of columns
81
    4. negative entries
82
    """
83
    if 'width' not in options:
84
        width = None
85
    else:
86
        width = options['width']
87
        try:
88
            width = options['width']
89
            if len(width) != number_of_columns:
90
                raise ValueError
91
            width = [float(Fraction(x)) for x in options['width']]
92
            if not all(i >= 0 for i in width):
93
                raise ValueError
94
        except (ValueError, TypeError):
95
            width = None
96
            panflute.debug("pantable: invalid width")
97
    return width
98
99
100
def get_table_width(options):
101
    """
102
    `table-width` set to `1.0` if invalid
103
    """
104
    if 'table-width' not in options:
105
        table_width = 1.0
106
    else:
107
        try:
108
            table_width = float(Fraction(options.get('table-width')))
109
            if table_width <= 0:
110
                raise ValueError
111
        except (ValueError, TypeError):
112
            table_width = 1.0
113
            panflute.debug("pantable: invalid table-width")
114
    return table_width
115
# end helper functions
116
117
118
def auto_width(table_width, number_of_columns, raw_table_list):
119
    """
120
    `width` is auto-calculated if not given in YAML
121
    It also returns None when table is empty.
122
    """
123
    # calculate width
124
    # The +3 match the way pandoc handle width, see jgm/pandoc commit 0dfceda
125
    width_abs = [3 + max(
126
        [max(
127
            [len(line) for line in row[i].split("\n")]
128
        ) for row in raw_table_list]
129
    ) for i in range(number_of_columns)]
130
    try:
131
        width_tot = sum(width_abs)
132
        # when all are 3 means all are empty, see comment above
133
        if width_tot == 3 * number_of_columns:
134
            raise ValueError
135
        width = [
136
            width_abs[i] / width_tot * table_width
137
            for i in range(number_of_columns)
138
        ]
139
    except ValueError:
140
        panflute.debug("pantable: table is empty")
141
        width = None
142
    return width
143
144
145
def parse_alignment(alignment_string, number_of_columns):
146
    """
147
    `alignment` string is parsed into pandoc format (AlignDefault, etc.)
148
    """
149
    # initialize
150
    alignment_string = str(alignment_string)
151
    number_of_alignments = len(alignment_string)
152
    # truncate and debug if too long
153
    try:
154
        if number_of_alignments > number_of_columns:
155
            raise ValueError
156
    except ValueError:
157
        alignment_string = alignment_string[:number_of_columns]
158
        panflute.debug("pantable: alignment string is too long")
159
    # parsing
160
    alignment = [("AlignLeft" if i.lower() == "l"
161
                  else "AlignCenter" if i.lower() == "c"
162
                  else "AlignRight" if i.lower() == "r"
163
                  else "AlignDefault" if i.lower() == "d"
164
                  else None) for i in alignment_string]
165
    # debug if invalid; set to default
166
    try:
167
        if None in alignment:
168
            raise ValueError
169
    except ValueError:
170
        alignment = [(i if i is not None else "AlignDefault")
171
                     for i in alignment]
172
        panflute.debug(
173
            "pantable: alignment: invalid character found, default is used instead.")
174
    # fill up with default if too short
175
    if number_of_columns > number_of_alignments:
176
        alignment += ["AlignDefault" for __ in range(
177
            number_of_columns - len(alignment))]
178
    return alignment
179
180
181
def read_data(include, data):
182
    """
183
    read csv and return the table in list
184
    """
185
    if include is None:
186
        with io.StringIO(data) as file:
187
            raw_table_list = list(csv.reader(file))
188
    else:
189
        path2file = str(include)
190
        try:
191
            with open(path2file) as file:
192
                raw_table_list = list(csv.reader(file))
193
        except FileNotFoundError:
194
            raw_table_list = None
195
            panflute.debug("pantable: file not found from the path", path2file)
196
    return raw_table_list
197
198
199
def regularize_table_list(raw_table_list):
200
    """
201
    When the length of rows are uneven, make it as long as the longest row.
202
    """
203
    max_number_of_columns = max(
204
        [len(row) for row in raw_table_list]
205
    )
206
    for row in raw_table_list:
207
        missing_number_of_columns = max_number_of_columns - len(row)
208
        if missing_number_of_columns > 0:
209
            row += ['' for __ in range(missing_number_of_columns)]
210
    return
211
212
213
def parse_table_list(markdown, raw_table_list):
214
    """
215
    read table in list and return panflute table format
216
    """
217
    if markdown:
218
        table_body = [panflute.TableRow(*[
219
            panflute.TableCell(*panflute.convert_text(x))
220
            for x in row
221
        ]) for row in raw_table_list]
222
    else:
223
        table_body = [panflute.TableRow(*[
224
            panflute.TableCell(panflute.Plain(panflute.Str(x)))
225
            for x in row
226
        ]) for row in raw_table_list]
227
    return table_body
228
229
230
def convert2table(options, data, **__):
231
    """
232
    provided to panflute.yaml_filter to parse its content as pandoc table.
233
    """
234
    # prepare table in list from data/include
235
    raw_table_list = read_data(options.get('include', None), data)
236
    # delete element if table is empty (by returning [])
237
    # element unchanged if include is invalid (by returning None)
238
    try:
239
        if not raw_table_list or raw_table_list is None:
240
            raise ValueError
241
    except ValueError:
242
        panflute.debug("pantable: table is empty or include is invalid")
243
        return raw_table_list
244
    # regularize table: all rows should have same length
245
    regularize_table_list(raw_table_list)
246
    # preparation: get no of columns of the table
247
    number_of_columns = len(raw_table_list[0])
248
249
    # Initialize the `options` output from `panflute.yaml_filter`
250
    # parse width
251
    width = get_width(options, number_of_columns)
252
    # auto-width when width is not specified
253
    if width is None:
254
        width = auto_width(get_table_width(
255
            options), number_of_columns, raw_table_list)
256
    # delete element if table is empty (by returning [])
257
    # width remains None only when table is empty
258
    try:
259
        if width is None:
260
            raise ValueError
261
    except ValueError:
262
        panflute.debug("pantable: table is empty")
263
        return []
264
    # parse alignment
265
    alignment = parse_alignment(options.get(
266
        'alignment', None), number_of_columns)
267
    header = to_bool(options.get('header', True), True)
268
    markdown = to_bool(options.get('markdown', False), False)
269
270
    # get caption: parsed as markdown into panflute AST if non-empty.
271
    caption = panflute.convert_text(str(options['caption']))[
272
        0].content if 'caption' in options else None
273
    # parse list to panflute table
274
    table_body = parse_table_list(markdown, raw_table_list)
275
    # extract header row
276
    header_row = table_body.pop(0) if (
277
        len(table_body) > 1 and header
278
    ) else None
279
    return panflute.Table(
280
        *table_body,
281
        caption=caption,
282
        alignment=alignment,
283
        width=width,
284
        header=header_row
285
    )
286
287
288
def main(_=None):
289
    """
290
    Fenced code block with class table will be parsed using
291
    panflute.yaml_filter with the fuction convert2table above.
292
    """
293
    return panflute.run_filter(
294
        panflute.yaml_filter,
295
        tag='table',
296
        function=convert2table,
297
        strict_yaml=True
298
    )
299
300
if __name__ == '__main__':
301
    main()
302