Passed
Push — master ( 1f4cff...5351aa )
by Kolen
01:06
created

to_bool()   A

Complexity

Conditions 3

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 6
Bugs 0 Features 0
Metric Value
cc 3
c 6
b 0
f 0
dl 0
loc 19
rs 9.4285
1
r"""
2
Panflute filter to parse table in fenced YAML code blocks.
3
Currently only CSV table is supported.
4
5
7 metadata keys are recognized:
6
7
-   caption: the caption of the table. If omitted, no caption will be inserted.
8
-   alignment: a string of characters among L,R,C,D, case-insensitive,
9
        corresponds to Left-aligned, Right-aligned,
10
        Center-aligned, Default-aligned respectively.
11
    e.g. LCRD for a table with 4 columns
12
    default: DDD...
13
-   width: a list of relative width corresponding to the width of each columns.
14
    default: auto calculate from the length of each line in table cells.
15
-   table-width: the relative width of the table (e.g. relative to \linewidth).
16
    default: 1.0
17
-   header: If it has a header row. default: true
18
-   markdown: If CSV table cell contains markdown syntax. default: False
19
-   include: the path to an CSV file.
20
    If non-empty, override the CSV in the CodeBlock.
21
    default: None
22
23
When the metadata keys is invalid, the default will be used instead.
24
Note that width and table-width accept fractions as well.
25
26
e.g.
27
28
```table
29
---
30
caption: '*Awesome* **Markdown** Table'
31
alignment: RC
32
table-width: 2/3
33
markdown: True
34
---
35
First row,defaulted to be header row,can be disabled
36
1,cell can contain **markdown**,"It can be aribrary block element:
37
38
- following standard markdown syntax
39
- like this"
40
2,"Any markdown syntax, e.g.",$$E = mc^2$$
41
```
42
"""
43
44
import csv
45
import fractions
46
import io
47
import panflute
48
49
import sys
50
py2 = sys.version_info[0] == 2
51
52
# begin helper functions
53
54
55
def get_width(options, number_of_columns):
56
    """
57
    get width: set to `None` when
58
59
    1. not given
60
    2. not a list
61
    3. length not equal to the number of columns
62
    4. negative entries
63
    """
64
    try:
65
        # if width not exists, exits immediately through except
66
        width = options['width']
67
        assert len(width) == number_of_columns
68
        custom_float = lambda x: float(fractions.Fraction(x))
69
        width = [custom_float(x) for x in options['width']]
70
        assert all(i >= 0 for i in width)
71
    except KeyError:
72
        width = None
73
    except (AssertionError, ValueError, TypeError):
74
        width = None
75
        panflute.debug("pantable: invalid width")
76
    return width
77
78
79
def get_table_width(options):
80
    """
81
    `table-width` set to `1.0` if invalid
82
    """
83
    try:
84
        table_width = float(fractions.Fraction(
85
            (options.get('table-width', 1.0))))
86
        assert table_width > 0
87
    except (ValueError, AssertionError, TypeError):
88
        table_width = 1.0
89
        panflute.debug("pantable: invalid table-width")
90
    return table_width
91
# end helper functions
92
93
94
def auto_width(table_width, number_of_columns, table_list):
95
    """
96
    `width` is auto-calculated if not given in YAML
97
    It also returns None when table is empty.
98
    """
99
    # calculate width
100
    # The +3 match the way pandoc handle width, see jgm/pandoc commit 0dfceda
101
    width_abs = [3 + max(
102
        [max(
103
            [len(line) for line in row[column_index].split("\n")]
104
        ) for row in table_list]
105
    ) for column_index in range(number_of_columns)]
106
    try:
107
        width_tot = sum(width_abs)
108
        # when all are 3 means all are empty, see comment above
109
        assert width_tot != 3 * number_of_columns
110
        width = [
111
            each_width / width_tot * table_width
112
            for each_width in width_abs
113
        ]
114
    except AssertionError:
115
        width = None
116
        panflute.debug("pantable: table is empty")
117
    return width
118
119
120
def parse_alignment(alignment_string, number_of_columns):
121
    """
122
    `alignment` string is parsed into pandoc format (AlignDefault, etc.).
123
    Cases are checked:
124
125
    - if not given, return None (let panflute handle it)
126
    - if wrong type
127
    - if too long
128
    - if invalid characters are given
129
    - if too short
130
    """
131
    # alignment string can be None or empty; return None: set to default by
132
    # panflute
133
    if not alignment_string:
134
        return None
135
136
    # prepare alignment_string
137
    try:
138
        # test valid type
139
        str_universal = basestring if py2 else str
140
        if not isinstance(alignment_string, str_universal):
141
            raise TypeError
142
        number_of_alignments = len(alignment_string)
143
        # truncate and debug if too long
144
        assert number_of_alignments <= number_of_columns
145
    except TypeError:
146
        panflute.debug("pantable: alignment string is invalid")
147
        # return None: set to default by panflute
148
        return None
149
    except AssertionError:
150
        alignment_string = alignment_string[:number_of_columns]
151
        panflute.debug(
152
            "pantable: alignment string is too long, truncated instead.")
153
154
    # parsing alignment
155
    align_dict = {'l': "AlignLeft",
156
                  'c': "AlignCenter",
157
                  'r': "AlignRight",
158
                  'd': "AlignDefault"}
159
    try:
160
        alignment = [align_dict[i.lower()] for i in alignment_string]
161
    except KeyError:
162
        panflute.debug(
163
            "pantable: alignment: invalid character found, default is used instead.")
164
        return None
165
166
    # fill up with default if too short
167
    if number_of_columns > number_of_alignments:
168
        alignment += ["AlignDefault" for __ in range(
169
            number_of_columns - number_of_alignments)]
170
171
    return alignment
172
173
174
def read_data(include, data):
175
    """
176
    read csv and return the table in list.
177
    Return None when the include path is invalid.
178
    """
179
    if include is None:
180
        if py2:
181
            data = data.encode('utf-8')
182
        io_universal = io.BytesIO if py2 else io.StringIO
183
        with io_universal(data) as file:
184
            raw_table_list = list(csv.reader(file))
185
    else:
186
        try:
187
            with open(str(include)) as file:
188
                raw_table_list = list(csv.reader(file))
189
        except IOError:  # FileNotFoundError is not in Python2
190
            raw_table_list = None
191
            panflute.debug("pantable: file not found from the path", include)
192
    return raw_table_list
193
194
195
def regularize_table_list(raw_table_list):
196
    """
197
    When the length of rows are uneven, make it as long as the longest row.
198
    """
199
    length_of_rows = [len(row) for row in raw_table_list]
200
    number_of_columns = max(length_of_rows)
201
    try:
202
        assert all(i == number_of_columns for i in length_of_rows)
203
        table_list = raw_table_list
204
    except AssertionError:
205
        table_list = [
206
            row + ['' for __ in range(number_of_columns - len(row))] for row in raw_table_list]
207
        panflute.debug(
208
            "pantable: table rows are of irregular length. Empty cells appended.")
209
    return (table_list, number_of_columns)
210
211
212
def parse_table_list(markdown, table_list):
213
    """
214
    read table in list and return panflute table format
215
    """
216
    # make functions local
217
    to_table_row = panflute.TableRow
218
    if markdown:
219
        to_table_cell = lambda x: panflute.TableCell(*panflute.convert_text(x))
220
    else:
221
        to_table_cell = lambda x: panflute.TableCell(
222
            panflute.Plain(panflute.Str(x)))
223
    return [to_table_row(*[to_table_cell(x) for x in row]) for row in table_list]
224
225
226
def convert2table(options, data, **__):
227
    """
228
    provided to panflute.yaml_filter to parse its content as pandoc table.
229
    """
230
    # prepare table in list from data/include
231
    raw_table_list = read_data(options.get('include', None), data)
232
    # delete element if table is empty (by returning [])
233
    # element unchanged if include is invalid (by returning None)
234
    try:
235
        assert raw_table_list and raw_table_list is not None
236
    except AssertionError:
237
        panflute.debug("pantable: table is empty or include is invalid")
238
        # [] means delete the current element; None means kept as is
239
        return raw_table_list
240
    # regularize table: all rows should have same length
241
    table_list, number_of_columns = regularize_table_list(raw_table_list)
242
243
    # Initialize the `options` output from `panflute.yaml_filter`
244
    # parse width
245
    width = get_width(options, number_of_columns)
246
    # auto-width when width is not specified
247
    if width is None:
248
        width = auto_width(get_table_width(
249
            options), number_of_columns, table_list)
250
    # delete element if table is empty (by returning [])
251
    # width remains None only when table is empty
252
    try:
253
        assert width is not None
254
    except AssertionError:
255
        panflute.debug("pantable: table is empty")
256
        return []
257
    # parse alignment
258
    alignment = parse_alignment(options.get(
259
        'alignment', None), number_of_columns)
260
    header = options.get('header', True)
261
    markdown = options.get('markdown', False)
262
263
    # get caption: parsed as markdown into panflute AST if non-empty.
264
    caption = panflute.convert_text(str(options['caption']))[
265
        0].content if 'caption' in options else None
266
    # parse list to panflute table
267
    table_body = parse_table_list(markdown, table_list)
268
    # extract header row
269
    header_row = table_body.pop(0) if (
270
        len(table_body) > 1 and header
271
    ) else None
272
    return panflute.Table(
273
        *table_body,
274
        caption=caption,
275
        alignment=alignment,
276
        width=width,
277
        header=header_row
278
    )
279
280
281
def main(_=None):
282
    """
283
    Fenced code block with class table will be parsed using
284
    panflute.yaml_filter with the fuction convert2table above.
285
    """
286
    return panflute.run_filter(
287
        panflute.yaml_filter,
288
        tag='table',
289
        function=convert2table,
290
        strict_yaml=True
291
    )
292
293
if __name__ == '__main__':
294
    main()
295