Completed
Push — master ( 6fc5da...7be251 )
by Kolen
01:32
created

parse_width()   F

Complexity

Conditions 9

Size

Total Lines 28

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 9
c 3
b 0
f 0
dl 0
loc 28
rs 3
1
#!/usr/bin/env python3
2
3
r"""
4
Panflute filter to parse table in fenced YAML code blocks.
5
Currently only CSV table is supported.
6
7
7 metadata keys are recognized:
8
9
-   caption: the caption of the table. If omitted, no caption will be inserted.
10
-   alignment: a string of characters among L,R,C,D, case-insensitive,
11
        corresponds to Left-aligned, Right-aligned,
12
        Center-aligned, Default-aligned respectively.
13
    e.g. LCRD for a table with 4 columns
14
    default: DDD...
15
-   width: a list of relative width corresponding to the width of each columns.
16
    default: auto calculate from the length of each line in table cells.
17
-   table-width: the relative width of the table (e.g. relative to \linewidth).
18
    default: 1.0
19
-   header: If it has a header row. default: true
20
-   markdown: If CSV table cell contains markdown syntax. default: False
21
-   include: the path to an CSV file.
22
    If non-empty, override the CSV in the CodeBlock.
23
    default: None
24
25
When the metadata keys is invalid, the default will be used instead.
26
27
e.g.
28
29
```table
30
---
31
caption: '*Awesome* **Markdown** Table'
32
alignment: RC
33
table-width: 0.7
34
markdown: True
35
---
36
First row,defaulted to be header row,can be disabled
37
1,cell can contain **markdown**,"It can be aribrary block element:
38
39
- following standard markdown syntax
40
- like this"
41
2,"Any markdown syntax, e.g.",$$E = mc^2$$
42
```
43
"""
44
45
import csv
46
import io
47
import os
48
import panflute
49
50
51
# begin helper functions
52
def to_bool(to_be_bool, default=True):
53
    """
54
    Do nothing if to_be_bool is boolean,
55
    return `False` if it is "false" or "no" (case-insensitive),
56
    otherwise return default.
57
    """
58
    if not isinstance(to_be_bool, bool):
59
        if str(to_be_bool).lower() in ("false", "no"):
60
            to_be_bool = False
61
        elif str(to_be_bool).lower() in ("true", "yes"):
62
            to_be_bool = True
63
        else:
64
            to_be_bool = default
65
            panflute.debug("""pantable: invalid boolean. \
66
Should be true/false/yes/no, case-insensitive.""")
67
    return to_be_bool
68
69
70
def get_width(options):
71
    """
72
    get width: set to `None` when invalid
73
    """
74
    if 'width' not in options:
75
        width = None
76
    else:
77
        width = options['width']
78
        try:
79
            width = [float(x) for x in options['width']]
80
            if not all(i >= 0 for i in width):
81
                raise ValueError
82
        except (ValueError, TypeError):
83
            width = None
84
            panflute.debug("pantable: invalid width")
85
    return width
86
87
88
def get_table_width(options):
89
    """
90
    `table-width` set to `1.0` if invalid
91
    """
92
    if 'table-width' not in options:
93
        table_width = 1.0
94
    else:
95
        try:
96
            table_width = float(options.get('table-width'))
97
            if table_width <= 0:
98
                raise ValueError
99
        except (ValueError, TypeError):
100
            table_width = 1.0
101
            panflute.debug("pantable: invalid table-width")
102
    return table_width
103
# end helper functions
104
105
106
def get_include(options):
107
    """
108
    include set to None if invalid
109
    """
110
    if 'include' not in options:
111
        include = None
112
    else:
113
        include = str(options.get('include'))
114
        if not os.path.isfile(include):
115
            include = None
116
            panflute.debug("pantable: invalid path from 'include'")
117
    return include
118
119
120
def parse_width(options, raw_table_list, number_of_columns):
121
    """
122
    `width` is auto-calculated if not given in YAML
123
    It also returns isempty=True when table has 0 total width.
124
    """
125
    width = get_width(options)
126
    table_width = get_table_width(options)
127
    # calculate width
128
    if width is None:
129
        width_abs = [max(
130
            [max(
131
                [len(line) for line in row[i].split("\n")]
132
            ) for row in raw_table_list]
133
        ) for i in range(number_of_columns)]
134
        try:
135
            if sum(width_abs) == 0:
136
                raise ValueError
137
            # match the way pandoc handle width, see jgm/pandoc commit 0dfceda
138
            width_abs = [each_width + 3 for each_width in width_abs]
139
            width_tot = sum(width_abs)
140
            width = [
141
                width_abs[i] / width_tot * table_width
142
                for i in range(number_of_columns)
143
            ]
144
        except ValueError:
145
            panflute.debug("pantable: table is empty")
146
            width = None
147
    return width
148
149
150
def parse_alignment(options, number_of_columns):
151
    """
152
    `alignment` string is parsed into pandoc format (AlignDefault, etc.)
153
    """
154
    alignment = options.get('alignment', None)
155
    # parse alignment
156
    if alignment is not None:
157
        alignment = str(alignment)
158
        # truncate and debug if too long
159
        if len(alignment) > number_of_columns:
160
            alignment = alignment[:number_of_columns]
161
            panflute.debug("pantable: alignment string is too long")
162
        # parsing
163
        parsed_alignment = [("AlignLeft" if each_alignment.lower() == "l"
164
                             else "AlignCenter" if each_alignment.lower() == "c"
165
                             else "AlignRight" if each_alignment.lower() == "r"
166
                             else "AlignDefault" if each_alignment.lower() == "d"
167
                             else None) for each_alignment in alignment]
168
        # debug if invalid; set to default
169
        if None in parsed_alignment:
170
            parsed_alignment = [(each_alignment if each_alignment is not None else "AlignDefault")
171
                                for each_alignment in parsed_alignment]
172
            panflute.debug("pantable: alignment string is invalid")
173
        # fill up with default if too short
174
        if number_of_columns > len(parsed_alignment):
175
            parsed_alignment += ["AlignDefault" for __ in range(
176
                number_of_columns - len(parsed_alignment))]
177
        alignment = parsed_alignment
178
    return alignment
179
180
181
def read_data(include, data):
182
    """
183
    read csv and return the table in list
184
    """
185
    if include is not None:
186
        with open(include) as file:
187
            raw_table_list = list(csv.reader(file))
188
    else:
189
        with io.StringIO(data) as file:
190
            raw_table_list = list(csv.reader(file))
191
    return raw_table_list
192
193
194
def regularize_table_list(raw_table_list):
195
    """
196
    When the length of rows are uneven, make it as long as the longest row.
197
    """
198
    max_number_of_columns = max(
199
        [len(row) for row in raw_table_list]
200
    )
201
    for row in raw_table_list:
202
        missing_number_of_columns = max_number_of_columns - len(row)
203
        if missing_number_of_columns > 0:
204
            row += ['' for __ in range(missing_number_of_columns)]
205
    return
206
207
208
def parse_table_list(markdown, raw_table_list):
209
    """
210
    read table in list and return panflute table format
211
    """
212
    table_body = []
213
    for row in raw_table_list:
214
        if markdown:
215
            cells = [
216
                panflute.TableCell(*panflute.convert_text(x))
217
                for x in row
218
            ]
219
        else:
220
            cells = [
221
                panflute.TableCell(panflute.Plain(panflute.Str(x)))
222
                for x in row
223
            ]
224
        table_body.append(panflute.TableRow(*cells))
225
    return table_body
226
227
228
def convert2table(options, data, **__):
229
    """
230
    provided to panflute.yaml_filter to parse its content as pandoc table.
231
    """
232
    # prepare table in list from data/include
233
    raw_table_list = read_data(get_include(options), data)
234
    # check empty table
235
    if not raw_table_list:
236
        panflute.debug("pantable: table is empty")
237
        return []
238
    # regularize table: all rows should have same length
239
    regularize_table_list(raw_table_list)
240
    # preparation: get no of columns of the table
241
    number_of_columns = len(raw_table_list[0])
242
243
    # Initialize the `options` output from `panflute.yaml_filter`
244
    # parse width
245
    width = parse_width(
246
        options, raw_table_list, number_of_columns)
247
    # check empty table
248
    if width is None:
249
        panflute.debug("pantable: table is empty")
250
        return []
251
    # parse alignment
252
    alignment = parse_alignment(options, number_of_columns)
253
    header = to_bool(options.get('header', True), True)
254
    markdown = to_bool(options.get('markdown', False), False)
255
256
    # get caption: parsed as markdown into panflute AST if non-empty.
257
    caption = panflute.convert_text(str(options['caption']))[
258
        0].content if 'caption' in options else None
259
    # parse list to panflute table
260
    table_body = parse_table_list(markdown, raw_table_list)
261
    # extract header row
262
    header_row = table_body.pop(0) if (
263
        len(table_body) > 1 and header
264
    ) else None
265
    return panflute.Table(
266
        *table_body,
267
        caption=caption,
268
        alignment=alignment,
269
        width=width,
270
        header=header_row
271
    )
272
273
274
def main(_=None):
275
    """
276
    Fenced code block with class table will be parsed using
277
    panflute.yaml_filter with the fuction convert2table above.
278
    """
279
    return panflute.run_filter(
280
        panflute.yaml_filter,
281
        tag='table',
282
        function=convert2table,
283
        strict_yaml=True
284
    )
285
286
if __name__ == '__main__':
287
    main()
288