Completed
Push — master ( 21567f...6fc5da )
by Kolen
01:12
created

parse_width()   C

Complexity

Conditions 7

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 7
c 3
b 0
f 0
dl 0
loc 24
rs 5.5
1
#!/usr/bin/env python3
2
3
r"""
4
Panflute filter to parse table in fenced YAML code blocks.
5
Currently only CSV table is supported.
6
7
7 metadata keys are recognized:
8
9
-   caption: the caption of the table. If omitted, no caption will be inserted.
10
-   alignment: a string of characters among L,R,C,D, case-insensitive,
11
        corresponds to Left-aligned, Right-aligned,
12
        Center-aligned, Default-aligned respectively.
13
    e.g. LCRD for a table with 4 columns
14
    default: DDD...
15
-   width: a list of relative width corresponding to the width of each columns.
16
    default: auto calculate from the length of each line in table cells.
17
-   table-width: the relative width of the table (e.g. relative to \linewidth).
18
    default: 1.0
19
-   header: If it has a header row. default: true
20
-   markdown: If CSV table cell contains markdown syntax. default: False
21
-   include: the path to an CSV file.
22
    If non-empty, override the CSV in the CodeBlock.
23
    default: None
24
25
When the metadata keys is invalid, the default will be used instead.
26
27
e.g.
28
29
```table
30
---
31
caption: '*Awesome* **Markdown** Table'
32
alignment: RC
33
table-width: 0.7
34
markdown: True
35
---
36
First row,defaulted to be header row,can be disabled
37
1,cell can contain **markdown**,"It can be aribrary block element:
38
39
- following standard markdown syntax
40
- like this"
41
2,"Any markdown syntax, e.g.",$$E = mc^2$$
42
```
43
"""
44
45
import csv
46
import io
47
import os
48
import panflute
49
50
51
# begin helper functions
52
def to_bool(to_be_bool, default=True):
53
    """
54
    Do nothing if to_be_bool is boolean,
55
    return `False` if it is "false" or "no" (case-insensitive),
56
    otherwise return default.
57
    """
58
    if not isinstance(to_be_bool, bool):
59
        if str(to_be_bool).lower() in ("false", "no"):
60
            to_be_bool = False
61
        elif str(to_be_bool).lower() in ("true", "yes"):
62
            to_be_bool = True
63
        else:
64
            to_be_bool = default
65
            panflute.debug("""pantable: invalid boolean. \
66
Should be true/false/yes/no, case-insensitive.""")
67
    return to_be_bool
68
69
70
def get_width(options):
71
    """
72
    get width: set to `None` when invalid
73
    """
74
    if 'width' not in options:
75
        width = None
76
    else:
77
        width = options['width']
78
        try:
79
            width = [float(x) for x in options['width']]
80
            if not all(i >= 0 for i in width):
81
                raise ValueError
82
        except (ValueError, TypeError):
83
            width = None
84
            panflute.debug("pantable: invalid width")
85
    return width
86
87
88
def get_table_width(options):
89
    """
90
    `table-width` set to `1.0` if invalid
91
    """
92
    if 'table-width' not in options:
93
        table_width = 1.0
94
    else:
95
        try:
96
            table_width = float(options.get('table-width'))
97
            if table_width <= 0:
98
                raise ValueError
99
        except (ValueError, TypeError):
100
            table_width = 1.0
101
            panflute.debug("pantable: invalid table-width")
102
    return table_width
103
# end helper functions
104
105
106
def get_include(options):
107
    """
108
    include set to None if invalid
109
    """
110
    if 'include' not in options:
111
        include = None
112
    else:
113
        include = str(options.get('include'))
114
        if not os.path.isfile(include):
115
            include = None
116
            panflute.debug("pantable: invalid path from 'include'")
117
    return include
118
119
120
def parse_width(options, raw_table_list, number_of_columns):
121
    """
122
    `width` is auto-calculated if not given in YAML
123
    It also returns isempty=True when table has 0 total width.
124
    """
125
    width = get_width(options)
126
    table_width = get_table_width(options)
127
    # calculate width
128
    if width is None:
129
        width_abs = [max(
130
            [max(
131
                [len(line) for line in row[i].split("\n")]
132
            ) for row in raw_table_list]
133
        ) for i in range(number_of_columns)]
134
        width_tot = sum(width_abs)
135
        try:
136
            width = [
137
                width_abs[i] / width_tot * table_width
138
                for i in range(number_of_columns)
139
            ]
140
        except ZeroDivisionError:
141
            panflute.debug("pantable: table has zero total width")
142
            width = None
143
    return width
144
145
146
def parse_alignment(options, number_of_columns):
147
    """
148
    `alignment` string is parsed into pandoc format (AlignDefault, etc.)
149
    """
150
    alignment = options.get('alignment', None)
151
    # parse alignment
152
    if alignment is not None:
153
        alignment = str(alignment)
154
        # truncate and debug if too long
155
        if len(alignment) > number_of_columns:
156
            alignment = alignment[:number_of_columns]
157
            panflute.debug("pantable: alignment string is too long")
158
        # parsing
159
        parsed_alignment = [("AlignLeft" if each_alignment.lower() == "l"
160
                             else "AlignCenter" if each_alignment.lower() == "c"
161
                             else "AlignRight" if each_alignment.lower() == "r"
162
                             else "AlignDefault" if each_alignment.lower() == "d"
163
                             else None) for each_alignment in alignment]
164
        # debug if invalid; set to default
165
        if None in parsed_alignment:
166
            parsed_alignment = [(each_alignment if each_alignment is not None else "AlignDefault")
167
                                for each_alignment in parsed_alignment]
168
            panflute.debug("pantable: alignment string is invalid")
169
        # fill up with default if too short
170
        if number_of_columns > len(parsed_alignment):
171
            parsed_alignment += ["AlignDefault" for __ in range(
172
                number_of_columns - len(parsed_alignment))]
173
        alignment = parsed_alignment
174
    return alignment
175
176
177
def read_data(include, data):
178
    """
179
    read csv and return the table in list
180
    """
181
    if include is not None:
182
        with open(include) as file:
183
            raw_table_list = list(csv.reader(file))
184
    else:
185
        with io.StringIO(data) as file:
186
            raw_table_list = list(csv.reader(file))
187
    return raw_table_list
188
189
190
def regularize_table_list(raw_table_list):
191
    """
192
    When the length of rows are uneven, make it as long as the longest row.
193
    """
194
    max_number_of_columns = max(
195
        [len(row) for row in raw_table_list]
196
    )
197
    for row in raw_table_list:
198
        missing_number_of_columns = max_number_of_columns - len(row)
199
        if missing_number_of_columns > 0:
200
            row += ['' for __ in range(missing_number_of_columns)]
201
    return
202
203
204
def parse_table_list(markdown, raw_table_list):
205
    """
206
    read table in list and return panflute table format
207
    """
208
    table_body = []
209
    for row in raw_table_list:
210
        if markdown:
211
            cells = [
212
                panflute.TableCell(*panflute.convert_text(x))
213
                for x in row
214
            ]
215
        else:
216
            cells = [
217
                panflute.TableCell(panflute.Plain(panflute.Str(x)))
218
                for x in row
219
            ]
220
        table_body.append(panflute.TableRow(*cells))
221
    return table_body
222
223
224
def convert2table(options, data, **__):
225
    """
226
    provided to panflute.yaml_filter to parse its content as pandoc table.
227
    """
228
    # prepare table in list from data/include
229
    raw_table_list = read_data(get_include(options), data)
230
    # check empty table
231
    if not raw_table_list:
232
        panflute.debug("pantable: table is empty")
233
        return []
234
    # regularize table: all rows should have same length
235
    regularize_table_list(raw_table_list)
236
    # preparation: get no of columns of the table
237
    number_of_columns = len(raw_table_list[0])
238
239
    # Initialize the `options` output from `panflute.yaml_filter`
240
    # parse width
241
    width = parse_width(
242
        options, raw_table_list, number_of_columns)
243
    # check empty table
244
    if width is None:
245
        panflute.debug("pantable: table is empty")
246
        return []
247
    # parse alignment
248
    alignment = parse_alignment(options, number_of_columns)
249
    header = to_bool(options.get('header', True), True)
250
    markdown = to_bool(options.get('markdown', False), False)
251
252
    # get caption: parsed as markdown into panflute AST if non-empty.
253
    caption = panflute.convert_text(str(options['caption']))[
254
        0].content if 'caption' in options else None
255
    # parse list to panflute table
256
    table_body = parse_table_list(markdown, raw_table_list)
257
    # extract header row
258
    header_row = table_body.pop(0) if (
259
        len(table_body) > 1 and header
260
    ) else None
261
    return panflute.Table(
262
        *table_body,
263
        caption=caption,
264
        alignment=alignment,
265
        width=width,
266
        header=header_row
267
    )
268
269
270
def main(_=None):
271
    """
272
    Fenced code block with class table will be parsed using
273
    panflute.yaml_filter with the fuction convert2table above.
274
    """
275
    return panflute.run_filter(
276
        panflute.yaml_filter,
277
        tag='table',
278
        function=convert2table,
279
        strict_yaml=True
280
    )
281
282
if __name__ == '__main__':
283
    main()
284