Completed
Push — master ( 5a265b...54824a )
by Kolen
01:09
created

get_width()   C

Complexity

Conditions 8

Size

Total Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 9
Bugs 1 Features 0
Metric Value
cc 8
c 9
b 1
f 0
dl 0
loc 22
rs 5.2631
1
#!/usr/bin/env python3
2
3
r"""
4
Panflute filter to parse table in fenced YAML code blocks.
5
Currently only CSV table is supported.
6
7
7 metadata keys are recognized:
8
9
-   caption: the caption of the table. If omitted, no caption will be inserted.
10
-   alignment: a string of characters among L,R,C,D, case-insensitive,
11
        corresponds to Left-aligned, Right-aligned,
12
        Center-aligned, Default-aligned respectively.
13
    e.g. LCRD for a table with 4 columns
14
    default: DDD...
15
-   width: a list of relative width corresponding to the width of each columns.
16
    default: auto calculate from the length of each line in table cells.
17
-   table-width: the relative width of the table (e.g. relative to \linewidth).
18
    default: 1.0
19
-   header: If it has a header row. default: true
20
-   markdown: If CSV table cell contains markdown syntax. default: False
21
-   include: the path to an CSV file.
22
    If non-empty, override the CSV in the CodeBlock.
23
    default: None
24
25
When the metadata keys is invalid, the default will be used instead.
26
Note that width and table-width accept fractions as well.
27
28
e.g.
29
30
```table
31
---
32
caption: '*Awesome* **Markdown** Table'
33
alignment: RC
34
table-width: 2/3
35
markdown: True
36
---
37
First row,defaulted to be header row,can be disabled
38
1,cell can contain **markdown**,"It can be aribrary block element:
39
40
- following standard markdown syntax
41
- like this"
42
2,"Any markdown syntax, e.g.",$$E = mc^2$$
43
```
44
"""
45
46
import csv
47
import fractions
48
import io
49
import panflute
50
51
52
# begin helper functions
53
def to_bool(to_be_bool, default=True):
54
    """
55
    Do nothing if to_be_bool is boolean,
56
    return `False` if it is "false" or "no" (case-insensitive),
57
    otherwise return default.
58
    """
59
    if not isinstance(to_be_bool, bool):
60
        try:
61
            lowered_bool = to_be_bool.lower()
62
            if lowered_bool in ("false", "no"):
63
                to_be_bool = False
64
            elif lowered_bool in ("true", "yes"):
65
                to_be_bool = True
66
            else:
67
                raise ValueError
68
        except (ValueError, TypeError, AttributeError):
69
            to_be_bool = default
70
            panflute.debug("""pantable: invalid boolean. \
71
Should be true/false/yes/no, case-insensitive. Default is used.""")
72
    return to_be_bool
73
74
75
def get_width(options, number_of_columns):
76
    """
77
    get width: set to `None` when
78
79
    1. not given
80
    2. not a list
81
    3. length not equal to the number of columns
82
    4. negative entries
83
    """
84
    try:
85
        # if width not exists, exits immediately through except
86
        width = options['width']
87
        assert len(width) == number_of_columns
88
        custom_float = lambda x: float(fractions.Fraction(x))
89
        width = [custom_float(x) for x in options['width']]
90
        assert all(i >= 0 for i in width)
91
    except KeyError:
92
        width = None
93
    except (AssertionError, ValueError, TypeError):
94
        width = None
95
        panflute.debug("pantable: invalid width")
96
    return width
97
98
99
def get_table_width(options):
100
    """
101
    `table-width` set to `1.0` if invalid
102
    """
103
    try:
104
        table_width = float(fractions.Fraction(
105
            (options.get('table-width', 1.0))))
106
        assert table_width > 0
107
    except (ValueError, AssertionError, TypeError):
108
        table_width = 1.0
109
        panflute.debug("pantable: invalid table-width")
110
    return table_width
111
# end helper functions
112
113
114
def auto_width(table_width, number_of_columns, table_list):
115
    """
116
    `width` is auto-calculated if not given in YAML
117
    It also returns None when table is empty.
118
    """
119
    # calculate width
120
    # The +3 match the way pandoc handle width, see jgm/pandoc commit 0dfceda
121
    width_abs = [3 + max(
122
        [max(
123
            [len(line) for line in row[column_index].split("\n")]
124
        ) for row in table_list]
125
    ) for column_index in range(number_of_columns)]
126
    try:
127
        width_tot = sum(width_abs)
128
        # when all are 3 means all are empty, see comment above
129
        assert width_tot != 3 * number_of_columns
130
        width = [
131
            each_width / width_tot * table_width
132
            for each_width in width_abs
133
        ]
134
    except AssertionError:
135
        width = None
136
        panflute.debug("pantable: table is empty")
137
    return width
138
139
140
def parse_alignment(alignment_string, number_of_columns):
141
    """
142
    `alignment` string is parsed into pandoc format (AlignDefault, etc.).
143
    Cases are checked:
144
145
    - if not given, return None (let panflute handle it)
146
    - if wrong type
147
    - if too long
148
    - if invalid characters are given
149
    - if too short
150
    """
151
    # alignment string can be None or empty; return None: set to default by
152
    # panflute
153
    if not alignment_string:
154
        return None
155
156
    # prepare alignment_string
157
    try:
158
        # test valid type
159
        if not isinstance(alignment_string, str):
160
            raise TypeError
161
        number_of_alignments = len(alignment_string)
162
        # truncate and debug if too long
163
        assert number_of_alignments <= number_of_columns
164
    except TypeError:
165
        panflute.debug("pantable: alignment string is invalid")
166
        # return None: set to default by panflute
167
        return None
168
    except AssertionError:
169
        alignment_string = alignment_string[:number_of_columns]
170
        panflute.debug(
171
            "pantable: alignment string is too long, truncated instead.")
172
173
    # parsing alignment
174
    align_dict = {'l': "AlignLeft",
175
                  'c': "AlignCenter",
176
                  'r': "AlignRight",
177
                  'd': "AlignDefault"}
178
    try:
179
        alignment = [align_dict[i.lower()] for i in alignment_string]
180
    except KeyError:
181
        panflute.debug(
182
            "pantable: alignment: invalid character found, default is used instead.")
183
        return None
184
185
    # fill up with default if too short
186
    if number_of_columns > number_of_alignments:
187
        alignment += ["AlignDefault" for __ in range(
188
            number_of_columns - number_of_alignments)]
189
190
    return alignment
191
192
193
def read_data(include, data):
194
    """
195
    read csv and return the table in list.
196
    Return None when the include path is invalid.
197
    """
198
    if include is None:
199
        with io.StringIO(data) as file:
200
            raw_table_list = list(csv.reader(file))
201
    else:
202
        try:
203
            with open(str(include)) as file:
204
                raw_table_list = list(csv.reader(file))
205
        except FileNotFoundError:
206
            raw_table_list = None
207
            panflute.debug('{} {}'.format(
208
                "pantable: file not found from the path", include))
209
    return raw_table_list
210
211
212
def regularize_table_list(raw_table_list):
213
    """
214
    When the length of rows are uneven, make it as long as the longest row.
215
    """
216
    length_of_rows = [len(row) for row in raw_table_list]
217
    number_of_columns = max(length_of_rows)
218
    try:
219
        assert all(i == number_of_columns for i in length_of_rows)
220
        table_list = raw_table_list
221
    except AssertionError:
222
        table_list = [
223
            row + ['' for __ in range(number_of_columns - len(row))] for row in raw_table_list]
224
        panflute.debug(
225
            "pantable: table rows are of irregular length. Empty cells appended.")
226
    return (table_list, number_of_columns)
227
228
229
def parse_table_list(markdown, table_list):
230
    """
231
    read table in list and return panflute table format
232
    """
233
    # make functions local
234
    to_table_row = panflute.TableRow
235
    if markdown:
236
        to_table_cell = lambda x: panflute.TableCell(*panflute.convert_text(x))
237
    else:
238
        to_table_cell = lambda x: panflute.TableCell(
239
            panflute.Plain(panflute.Str(x)))
240
    return [to_table_row(*[to_table_cell(x) for x in row]) for row in table_list]
241
242
243
def convert2table(options, data, **__):
244
    """
245
    provided to panflute.yaml_filter to parse its content as pandoc table.
246
    """
247
    # prepare table in list from data/include
248
    raw_table_list = read_data(options.get('include', None), data)
249
    # delete element if table is empty (by returning [])
250
    # element unchanged if include is invalid (by returning None)
251
    try:
252
        if not raw_table_list or raw_table_list is None:
253
            raise ValueError
254
    except ValueError:
255
        panflute.debug("pantable: table is empty or include is invalid")
256
        # [] means delete the current element; None means kept as is
257
        return raw_table_list
258
    # regularize table: all rows should have same length
259
    table_list, number_of_columns = regularize_table_list(raw_table_list)
260
261
    # Initialize the `options` output from `panflute.yaml_filter`
262
    # parse width
263
    width = get_width(options, number_of_columns)
264
    # auto-width when width is not specified
265
    if width is None:
266
        width = auto_width(get_table_width(
267
            options), number_of_columns, table_list)
268
    # delete element if table is empty (by returning [])
269
    # width remains None only when table is empty
270
    try:
271
        if width is None:
272
            raise ValueError
273
    except ValueError:
274
        panflute.debug("pantable: table is empty")
275
        return []
276
    # parse alignment
277
    alignment = parse_alignment(options.get(
278
        'alignment', None), number_of_columns)
279
    header = to_bool(options.get('header', True), True)
280
    markdown = to_bool(options.get('markdown', False), False)
281
282
    # get caption: parsed as markdown into panflute AST if non-empty.
283
    caption = panflute.convert_text(str(options['caption']))[
284
        0].content if 'caption' in options else None
285
    # parse list to panflute table
286
    table_body = parse_table_list(markdown, table_list)
287
    # extract header row
288
    header_row = table_body.pop(0) if (
289
        len(table_body) > 1 and header
290
    ) else None
291
    return panflute.Table(
292
        *table_body,
293
        caption=caption,
294
        alignment=alignment,
295
        width=width,
296
        header=header_row
297
    )
298
299
300
def main(_=None):
301
    """
302
    Fenced code block with class table will be parsed using
303
    panflute.yaml_filter with the fuction convert2table above.
304
    """
305
    return panflute.run_filter(
306
        panflute.yaml_filter,
307
        tag='table',
308
        function=convert2table,
309
        strict_yaml=True
310
    )
311
312
if __name__ == '__main__':
313
    main()
314