Completed
Push — master ( d53e61...84d7b4 )
by Kolen
01:05
created

to_bool()   A

Complexity

Conditions 3

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 8
Bugs 0 Features 0
Metric Value
cc 3
c 8
b 0
f 0
dl 0
loc 19
rs 9.4285
1
#!/usr/bin/env python3
2
3
r"""
4
Panflute filter to parse table in fenced YAML code blocks.
5
Currently only CSV table is supported.
6
7
7 metadata keys are recognized:
8
9
-   caption: the caption of the table. If omitted, no caption will be inserted.
10
-   alignment: a string of characters among L,R,C,D, case-insensitive,
11
        corresponds to Left-aligned, Right-aligned,
12
        Center-aligned, Default-aligned respectively.
13
    e.g. LCRD for a table with 4 columns
14
    default: DDD...
15
-   width: a list of relative width corresponding to the width of each columns.
16
    default: auto calculate from the length of each line in table cells.
17
-   table-width: the relative width of the table (e.g. relative to \linewidth).
18
    default: 1.0
19
-   header: If it has a header row. default: true
20
-   markdown: If CSV table cell contains markdown syntax. default: False
21
-   include: the path to an CSV file.
22
    If non-empty, override the CSV in the CodeBlock.
23
    default: None
24
25
When the metadata keys is invalid, the default will be used instead.
26
Note that width and table-width accept fractions as well.
27
28
e.g.
29
30
```table
31
---
32
caption: '*Awesome* **Markdown** Table'
33
alignment: RC
34
table-width: 2/3
35
markdown: True
36
---
37
First row,defaulted to be header row,can be disabled
38
1,cell can contain **markdown**,"It can be aribrary block element:
39
40
- following standard markdown syntax
41
- like this"
42
2,"Any markdown syntax, e.g.",$$E = mc^2$$
43
```
44
"""
45
46
import csv
47
import fractions
48
import io
49
import panflute
50
51
52
# begin helper functions
53
def to_bool(to_be_bool, default=True):
54
    """
55
    Do nothing if to_be_bool is boolean,
56
    return `False` if it is "false" or "no" (case-insensitive),
57
    otherwise return default.
58
    """
59
    if isinstance(to_be_bool, bool):
60
        # nothing need to do if already boolean
61
        return to_be_bool
62
    else:
63
        bool_dict = {"false": False, "true": True,
64
                     "no": False, "yes": True}
65
        try:
66
            booled = bool_dict[to_be_bool.lower()]
67
        except (KeyError, AttributeError):
68
            booled = default
69
            panflute.debug("""pantable: invalid boolean. \
70
Should be true/false/yes/no, case-insensitive. Default is used.""")
71
    return booled
72
73
74
def get_width(options, number_of_columns):
75
    """
76
    get width: set to `None` when
77
78
    1. not given
79
    2. not a list
80
    3. length not equal to the number of columns
81
    4. negative entries
82
    """
83
    try:
84
        # if width not exists, exits immediately through except
85
        width = options['width']
86
        assert len(width) == number_of_columns
87
        custom_float = lambda x: float(fractions.Fraction(x))
88
        width = [custom_float(x) for x in options['width']]
89
        assert all(i >= 0 for i in width)
90
    except KeyError:
91
        width = None
92
    except (AssertionError, ValueError, TypeError):
93
        width = None
94
        panflute.debug("pantable: invalid width")
95
    return width
96
97
98
def get_table_width(options):
99
    """
100
    `table-width` set to `1.0` if invalid
101
    """
102
    try:
103
        table_width = float(fractions.Fraction(
104
            (options.get('table-width', 1.0))))
105
        assert table_width > 0
106
    except (ValueError, AssertionError, TypeError):
107
        table_width = 1.0
108
        panflute.debug("pantable: invalid table-width")
109
    return table_width
110
# end helper functions
111
112
113
def auto_width(table_width, number_of_columns, table_list):
114
    """
115
    `width` is auto-calculated if not given in YAML
116
    It also returns None when table is empty.
117
    """
118
    # calculate width
119
    # The +3 match the way pandoc handle width, see jgm/pandoc commit 0dfceda
120
    width_abs = [3 + max(
121
        [max(
122
            [len(line) for line in row[column_index].split("\n")]
123
        ) for row in table_list]
124
    ) for column_index in range(number_of_columns)]
125
    try:
126
        width_tot = sum(width_abs)
127
        # when all are 3 means all are empty, see comment above
128
        assert width_tot != 3 * number_of_columns
129
        width = [
130
            each_width / width_tot * table_width
131
            for each_width in width_abs
132
        ]
133
    except AssertionError:
134
        width = None
135
        panflute.debug("pantable: table is empty")
136
    return width
137
138
139
def parse_alignment(alignment_string, number_of_columns):
140
    """
141
    `alignment` string is parsed into pandoc format (AlignDefault, etc.).
142
    Cases are checked:
143
144
    - if not given, return None (let panflute handle it)
145
    - if wrong type
146
    - if too long
147
    - if invalid characters are given
148
    - if too short
149
    """
150
    # alignment string can be None or empty; return None: set to default by
151
    # panflute
152
    if not alignment_string:
153
        return None
154
155
    # prepare alignment_string
156
    try:
157
        # test valid type
158
        if not isinstance(alignment_string, str):
159
            raise TypeError
160
        number_of_alignments = len(alignment_string)
161
        # truncate and debug if too long
162
        assert number_of_alignments <= number_of_columns
163
    except TypeError:
164
        panflute.debug("pantable: alignment string is invalid")
165
        # return None: set to default by panflute
166
        return None
167
    except AssertionError:
168
        alignment_string = alignment_string[:number_of_columns]
169
        panflute.debug(
170
            "pantable: alignment string is too long, truncated instead.")
171
172
    # parsing alignment
173
    align_dict = {'l': "AlignLeft",
174
                  'c': "AlignCenter",
175
                  'r': "AlignRight",
176
                  'd': "AlignDefault"}
177
    try:
178
        alignment = [align_dict[i.lower()] for i in alignment_string]
179
    except KeyError:
180
        panflute.debug(
181
            "pantable: alignment: invalid character found, default is used instead.")
182
        return None
183
184
    # fill up with default if too short
185
    if number_of_columns > number_of_alignments:
186
        alignment += ["AlignDefault" for __ in range(
187
            number_of_columns - number_of_alignments)]
188
189
    return alignment
190
191
192
def read_data(include, data):
193
    """
194
    read csv and return the table in list.
195
    Return None when the include path is invalid.
196
    """
197
    if include is None:
198
        with io.StringIO(data) as file:
199
            raw_table_list = list(csv.reader(file))
200
    else:
201
        try:
202
            with open(str(include)) as file:
203
                raw_table_list = list(csv.reader(file))
204
        except FileNotFoundError:
205
            raw_table_list = None
206
            panflute.debug('{} {}'.format(
207
                "pantable: file not found from the path", include))
208
    return raw_table_list
209
210
211
def regularize_table_list(raw_table_list):
212
    """
213
    When the length of rows are uneven, make it as long as the longest row.
214
    """
215
    length_of_rows = [len(row) for row in raw_table_list]
216
    number_of_columns = max(length_of_rows)
217
    try:
218
        assert all(i == number_of_columns for i in length_of_rows)
219
        table_list = raw_table_list
220
    except AssertionError:
221
        table_list = [
222
            row + ['' for __ in range(number_of_columns - len(row))] for row in raw_table_list]
223
        panflute.debug(
224
            "pantable: table rows are of irregular length. Empty cells appended.")
225
    return (table_list, number_of_columns)
226
227
228
def parse_table_list(markdown, table_list):
229
    """
230
    read table in list and return panflute table format
231
    """
232
    # make functions local
233
    to_table_row = panflute.TableRow
234
    if markdown:
235
        to_table_cell = lambda x: panflute.TableCell(*panflute.convert_text(x))
236
    else:
237
        to_table_cell = lambda x: panflute.TableCell(
238
            panflute.Plain(panflute.Str(x)))
239
    return [to_table_row(*[to_table_cell(x) for x in row]) for row in table_list]
240
241
242
def convert2table(options, data, **__):
243
    """
244
    provided to panflute.yaml_filter to parse its content as pandoc table.
245
    """
246
    # prepare table in list from data/include
247
    raw_table_list = read_data(options.get('include', None), data)
248
    # delete element if table is empty (by returning [])
249
    # element unchanged if include is invalid (by returning None)
250
    try:
251
        assert raw_table_list and raw_table_list is not None
252
    except AssertionError:
253
        panflute.debug("pantable: table is empty or include is invalid")
254
        # [] means delete the current element; None means kept as is
255
        return raw_table_list
256
    # regularize table: all rows should have same length
257
    table_list, number_of_columns = regularize_table_list(raw_table_list)
258
259
    # Initialize the `options` output from `panflute.yaml_filter`
260
    # parse width
261
    width = get_width(options, number_of_columns)
262
    # auto-width when width is not specified
263
    if width is None:
264
        width = auto_width(get_table_width(
265
            options), number_of_columns, table_list)
266
    # delete element if table is empty (by returning [])
267
    # width remains None only when table is empty
268
    try:
269
        assert width is not None
270
    except AssertionError:
271
        panflute.debug("pantable: table is empty")
272
        return []
273
    # parse alignment
274
    alignment = parse_alignment(options.get(
275
        'alignment', None), number_of_columns)
276
    header = to_bool(options.get('header', True), True)
277
    markdown = to_bool(options.get('markdown', False), False)
278
279
    # get caption: parsed as markdown into panflute AST if non-empty.
280
    caption = panflute.convert_text(str(options['caption']))[
281
        0].content if 'caption' in options else None
282
    # parse list to panflute table
283
    table_body = parse_table_list(markdown, table_list)
284
    # extract header row
285
    header_row = table_body.pop(0) if (
286
        len(table_body) > 1 and header
287
    ) else None
288
    return panflute.Table(
289
        *table_body,
290
        caption=caption,
291
        alignment=alignment,
292
        width=width,
293
        header=header_row
294
    )
295
296
297
def main(_=None):
298
    """
299
    Fenced code block with class table will be parsed using
300
    panflute.yaml_filter with the fuction convert2table above.
301
    """
302
    return panflute.run_filter(
303
        panflute.yaml_filter,
304
        tag='table',
305
        function=convert2table,
306
        strict_yaml=True
307
    )
308
309
if __name__ == '__main__':
310
    main()
311