auto_width()   C
last analyzed

Complexity

Conditions 7

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %

Importance

Changes 5
Bugs 1 Features 0
Metric Value
cc 7
c 5
b 1
f 0
dl 0
loc 24
rs 5.5
1
r"""
2
Panflute filter to parse table in fenced YAML code blocks.
3
Currently only CSV table is supported.
4
5
7 metadata keys are recognized:
6
7
-   caption: the caption of the table. If omitted, no caption will be inserted.
8
-   alignment: a string of characters among L,R,C,D, case-insensitive,
9
        corresponds to Left-aligned, Right-aligned,
10
        Center-aligned, Default-aligned respectively.
11
    e.g. LCRD for a table with 4 columns
12
    default: DDD...
13
-   width: a list of relative width corresponding to the width of each columns.
14
    default: auto calculate from the length of each line in table cells.
15
-   table-width: the relative width of the table (e.g. relative to \linewidth).
16
    default: 1.0
17
-   header: If it has a header row. default: true
18
-   markdown: If CSV table cell contains markdown syntax. default: False
19
-   include: the path to an CSV file.
20
    If non-empty, override the CSV in the CodeBlock.
21
    default: None
22
23
When the metadata keys is invalid, the default will be used instead.
24
Note that width and table-width accept fractions as well.
25
26
e.g.
27
28
```table
29
---
30
caption: '*Awesome* **Markdown** Table'
31
alignment: RC
32
table-width: 2/3
33
markdown: True
34
---
35
First row,defaulted to be header row,can be disabled
36
1,cell can contain **markdown**,"It can be aribrary block element:
37
38
- following standard markdown syntax
39
- like this"
40
2,"Any markdown syntax, e.g.",$$E = mc^2$$
41
```
42
"""
43
44
import fractions
45
import io
46
import panflute
47
48
import sys
49
py2 = sys.version_info[0] == 2
50
51
if py2:
52
    from backports import csv
53
else:
54
    import csv
55
56
# begin helper functions
57
58
59
def get_width(options, number_of_columns):
60
    """
61
    get width: set to `None` when
62
63
    1. not given
64
    2. not a list
65
    3. length not equal to the number of columns
66
    4. negative entries
67
    """
68
    try:
69
        # if width not exists, exits immediately through except
70
        width = options['width']
71
        assert len(width) == number_of_columns
72
        custom_float = lambda x: float(fractions.Fraction(x))
73
        width = [custom_float(x) for x in options['width']]
74
        assert all(i >= 0 for i in width)
75
    except KeyError:
76
        width = None
77
    except (AssertionError, ValueError, TypeError):
78
        width = None
79
        panflute.debug("pantable: invalid width")
80
    return width
81
82
83
def get_table_width(options):
84
    """
85
    `table-width` set to `1.0` if invalid
86
    """
87
    try:
88
        table_width = float(fractions.Fraction(
89
            (options.get('table-width', 1.0))))
90
        assert table_width > 0
91
    except (ValueError, AssertionError, TypeError):
92
        table_width = 1.0
93
        panflute.debug("pantable: invalid table-width")
94
    return table_width
95
# end helper functions
96
97
98
def auto_width(table_width, number_of_columns, table_list):
99
    """
100
    `width` is auto-calculated if not given in YAML
101
    It also returns None when table is empty.
102
    """
103
    # calculate width
104
    # The +3 match the way pandoc handle width, see jgm/pandoc commit 0dfceda
105
    width_abs = [3 + max(
106
        [max(
107
            [len(line) for line in row[column_index].split("\n")]
108
        ) for row in table_list]
109
    ) for column_index in range(number_of_columns)]
110
    try:
111
        width_tot = sum(width_abs)
112
        # when all are 3 means all are empty, see comment above
113
        assert width_tot != 3 * number_of_columns
114
        width = [
115
            each_width / width_tot * table_width
116
            for each_width in width_abs
117
        ]
118
    except AssertionError:
119
        width = None
120
        panflute.debug("pantable: table is empty")
121
    return width
122
123
124
def parse_alignment(alignment_string, number_of_columns):
125
    """
126
    `alignment` string is parsed into pandoc format (AlignDefault, etc.).
127
    Cases are checked:
128
129
    - if not given, return None (let panflute handle it)
130
    - if wrong type
131
    - if too long
132
    - if invalid characters are given
133
    - if too short
134
    """
135
    # alignment string can be None or empty; return None: set to default by
136
    # panflute
137
    if not alignment_string:
138
        return None
139
140
    # prepare alignment_string
141
    try:
142
        # test valid type
143
        str_universal = basestring if py2 else str
144
        if not isinstance(alignment_string, str_universal):
145
            raise TypeError
146
        number_of_alignments = len(alignment_string)
147
        # truncate and debug if too long
148
        assert number_of_alignments <= number_of_columns
149
    except TypeError:
150
        panflute.debug("pantable: alignment string is invalid")
151
        # return None: set to default by panflute
152
        return None
153
    except AssertionError:
154
        alignment_string = alignment_string[:number_of_columns]
155
        panflute.debug(
156
            "pantable: alignment string is too long, truncated instead.")
157
158
    # parsing alignment
159
    align_dict = {'l': "AlignLeft",
160
                  'c': "AlignCenter",
161
                  'r': "AlignRight",
162
                  'd': "AlignDefault"}
163
    try:
164
        alignment = [align_dict[i.lower()] for i in alignment_string]
165
    except KeyError:
166
        panflute.debug(
167
            "pantable: alignment: invalid character found, default is used instead.")
168
        return None
169
170
    # fill up with default if too short
171
    if number_of_columns > number_of_alignments:
172
        alignment += ["AlignDefault" for __ in range(
173
            number_of_columns - number_of_alignments)]
174
175
    return alignment
176
177
178
def read_data(include, data):
179
    """
180
    read csv and return the table in list.
181
    Return None when the include path is invalid.
182
    """
183
    if include is None:
184
        with io.StringIO(data) as file:
185
            raw_table_list = list(csv.reader(file))
186
    else:
187
        try:
188
            with io.open(str(include)) as file:
189
                raw_table_list = list(csv.reader(file))
190
        except IOError:  # FileNotFoundError is not in Python2
191
            raw_table_list = None
192
            panflute.debug("pantable: file not found from the path", include)
193
    return raw_table_list
194
195
196
def regularize_table_list(raw_table_list):
197
    """
198
    When the length of rows are uneven, make it as long as the longest row.
199
    """
200
    length_of_rows = [len(row) for row in raw_table_list]
201
    number_of_columns = max(length_of_rows)
202
    try:
203
        assert all(i == number_of_columns for i in length_of_rows)
204
        table_list = raw_table_list
205
    except AssertionError:
206
        table_list = [
207
            row + ['' for __ in range(number_of_columns - len(row))] for row in raw_table_list]
208
        panflute.debug(
209
            "pantable: table rows are of irregular length. Empty cells appended.")
210
    return (table_list, number_of_columns)
211
212
213
def parse_table_list(markdown, table_list):
214
    """
215
    read table in list and return panflute table format
216
    """
217
    # make functions local
218
    to_table_row = panflute.TableRow
219
    if markdown:
220
        to_table_cell = lambda x: panflute.TableCell(*panflute.convert_text(x))
221
    else:
222
        to_table_cell = lambda x: panflute.TableCell(
223
            panflute.Plain(panflute.Str(x)))
224
    return [to_table_row(*[to_table_cell(x) for x in row]) for row in table_list]
225
226
227
def convert2table(options, data, **__):
228
    """
229
    provided to panflute.yaml_filter to parse its content as pandoc table.
230
    """
231
    # prepare table in list from data/include
232
    raw_table_list = read_data(options.get('include', None), data)
233
    # delete element if table is empty (by returning [])
234
    # element unchanged if include is invalid (by returning None)
235
    try:
236
        assert raw_table_list and raw_table_list is not None
237
    except AssertionError:
238
        panflute.debug("pantable: table is empty or include is invalid")
239
        # [] means delete the current element; None means kept as is
240
        return raw_table_list
241
    # regularize table: all rows should have same length
242
    table_list, number_of_columns = regularize_table_list(raw_table_list)
243
244
    # Initialize the `options` output from `panflute.yaml_filter`
245
    # parse width
246
    width = get_width(options, number_of_columns)
247
    # auto-width when width is not specified
248
    if width is None:
249
        width = auto_width(get_table_width(
250
            options), number_of_columns, table_list)
251
    # delete element if table is empty (by returning [])
252
    # width remains None only when table is empty
253
    try:
254
        assert width is not None
255
    except AssertionError:
256
        panflute.debug("pantable: table is empty")
257
        return []
258
    # parse alignment
259
    alignment = parse_alignment(options.get(
260
        'alignment', None), number_of_columns)
261
    header = options.get('header', True)
262
    markdown = options.get('markdown', False)
263
264
    # get caption: parsed as markdown into panflute AST if non-empty.
265
    caption = panflute.convert_text(str(options['caption']))[
266
        0].content if 'caption' in options else None
267
    # parse list to panflute table
268
    table_body = parse_table_list(markdown, table_list)
269
    # extract header row
270
    header_row = table_body.pop(0) if (
271
        len(table_body) > 1 and header
272
    ) else None
273
    return panflute.Table(
274
        *table_body,
275
        caption=caption,
276
        alignment=alignment,
277
        width=width,
278
        header=header_row
279
    )
280
281
282
def main(doc=None):
283
    """
284
    Fenced code block with class table will be parsed using
285
    panflute.yaml_filter with the fuction convert2table above.
286
    """
287
    return panflute.run_filter(
288
        panflute.yaml_filter,
289
        tag='table',
290
        function=convert2table,
291
        strict_yaml=True,
292
        doc=doc
293
    )
294
295
if __name__ == '__main__':
296
    main()
297