Completed
Pull Request — master (#7)
by Kolen
01:17
created

read_data()   C

Complexity

Conditions 7

Size

Total Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 6
Bugs 0 Features 0
Metric Value
cc 7
c 6
b 0
f 0
dl 0
loc 21
rs 6.4705
1
r"""
2
Panflute filter to parse table in fenced YAML code blocks.
3
Currently only CSV table is supported.
4
5
7 metadata keys are recognized:
6
7
-   caption: the caption of the table. If omitted, no caption will be inserted.
8
-   alignment: a string of characters among L,R,C,D, case-insensitive,
9
        corresponds to Left-aligned, Right-aligned,
10
        Center-aligned, Default-aligned respectively.
11
    e.g. LCRD for a table with 4 columns
12
    default: DDD...
13
-   width: a list of relative width corresponding to the width of each columns.
14
    default: auto calculate from the length of each line in table cells.
15
-   table-width: the relative width of the table (e.g. relative to \linewidth).
16
    default: 1.0
17
-   header: If it has a header row. default: true
18
-   markdown: If CSV table cell contains markdown syntax. default: False
19
-   include: the path to an CSV file.
20
    If non-empty, override the CSV in the CodeBlock.
21
    default: None
22
23
When the metadata keys is invalid, the default will be used instead.
24
Note that width and table-width accept fractions as well.
25
26
e.g.
27
28
```table
29
---
30
caption: '*Awesome* **Markdown** Table'
31
alignment: RC
32
table-width: 2/3
33
markdown: True
34
---
35
First row,defaulted to be header row,can be disabled
36
1,cell can contain **markdown**,"It can be aribrary block element:
37
38
- following standard markdown syntax
39
- like this"
40
2,"Any markdown syntax, e.g.",$$E = mc^2$$
41
```
42
"""
43
44
import csv
45
import fractions
46
import io
47
import panflute
48
49
import sys
50
py2 = sys.version_info[0] == 2
51
my_str = str if not py2 else basestring
52
53
# begin helper functions
54
55
56
def to_bool(to_be_bool, default=True):
57
    """
58
    Do nothing if to_be_bool is boolean,
59
    return `False` if it is "false" or "no" (case-insensitive),
60
    otherwise return default.
61
    """
62
    if isinstance(to_be_bool, bool):
63
        # nothing need to do if already boolean
64
        return to_be_bool
65
    else:
66
        bool_dict = {"false": False, "true": True,
67
                     "no": False, "yes": True}
68
        try:
69
            booled = bool_dict[to_be_bool.lower()]
70
        except (KeyError, AttributeError):
71
            booled = default
72
            panflute.debug("""pantable: invalid boolean. \
73
Should be true/false/yes/no, case-insensitive. Default is used.""")
74
    return booled
75
76
77
def get_width(options, number_of_columns):
78
    """
79
    get width: set to `None` when
80
81
    1. not given
82
    2. not a list
83
    3. length not equal to the number of columns
84
    4. negative entries
85
    """
86
    try:
87
        # if width not exists, exits immediately through except
88
        width = options['width']
89
        assert len(width) == number_of_columns
90
        custom_float = lambda x: float(fractions.Fraction(x))
91
        width = [custom_float(x) for x in options['width']]
92
        assert all(i >= 0 for i in width)
93
    except KeyError:
94
        width = None
95
    except (AssertionError, ValueError, TypeError):
96
        width = None
97
        panflute.debug("pantable: invalid width")
98
    return width
99
100
101
def get_table_width(options):
102
    """
103
    `table-width` set to `1.0` if invalid
104
    """
105
    try:
106
        table_width = float(fractions.Fraction(
107
            (options.get('table-width', 1.0))))
108
        assert table_width > 0
109
    except (ValueError, AssertionError, TypeError):
110
        table_width = 1.0
111
        panflute.debug("pantable: invalid table-width")
112
    return table_width
113
# end helper functions
114
115
116
def auto_width(table_width, number_of_columns, table_list):
117
    """
118
    `width` is auto-calculated if not given in YAML
119
    It also returns None when table is empty.
120
    """
121
    # calculate width
122
    # The +3 match the way pandoc handle width, see jgm/pandoc commit 0dfceda
123
    width_abs = [3 + max(
124
        [max(
125
            [len(line) for line in row[column_index].split("\n")]
126
        ) for row in table_list]
127
    ) for column_index in range(number_of_columns)]
128
    try:
129
        width_tot = sum(width_abs)
130
        # when all are 3 means all are empty, see comment above
131
        assert width_tot != 3 * number_of_columns
132
        width = [
133
            each_width / width_tot * table_width
134
            for each_width in width_abs
135
        ]
136
    except AssertionError:
137
        width = None
138
        panflute.debug("pantable: table is empty")
139
    return width
140
141
142
def parse_alignment(alignment_string, number_of_columns):
143
    """
144
    `alignment` string is parsed into pandoc format (AlignDefault, etc.).
145
    Cases are checked:
146
147
    - if not given, return None (let panflute handle it)
148
    - if wrong type
149
    - if too long
150
    - if invalid characters are given
151
    - if too short
152
    """
153
    # alignment string can be None or empty; return None: set to default by
154
    # panflute
155
    if not alignment_string:
156
        return None
157
158
    # prepare alignment_string
159
    try:
160
        # test valid type
161
        if not isinstance(alignment_string, my_str):
162
            raise TypeError
163
        number_of_alignments = len(alignment_string)
164
        # truncate and debug if too long
165
        assert number_of_alignments <= number_of_columns
166
    except TypeError:
167
        panflute.debug("pantable: alignment string is invalid")
168
        # return None: set to default by panflute
169
        return None
170
    except AssertionError:
171
        alignment_string = alignment_string[:number_of_columns]
172
        panflute.debug(
173
            "pantable: alignment string is too long, truncated instead.")
174
175
    # parsing alignment
176
    align_dict = {'l': "AlignLeft",
177
                  'c': "AlignCenter",
178
                  'r': "AlignRight",
179
                  'd': "AlignDefault"}
180
    try:
181
        alignment = [align_dict[i.lower()] for i in alignment_string]
182
    except KeyError:
183
        panflute.debug(
184
            "pantable: alignment: invalid character found, default is used instead.")
185
        return None
186
187
    # fill up with default if too short
188
    if number_of_columns > number_of_alignments:
189
        alignment += ["AlignDefault" for __ in range(
190
            number_of_columns - number_of_alignments)]
191
192
    return alignment
193
194
195
def read_data(include, data):
196
    """
197
    read csv and return the table in list.
198
    Return None when the include path is invalid.
199
    """
200
    if include is None:
201
        if not py2:
202
            with io.StringIO(data) as file:
203
                raw_table_list = list(csv.reader(file))
204
        else:
205
            data = data.encode('utf-8')
206
            with io.BytesIO(data) as file:
207
                raw_table_list = list(csv.reader(file))
208
    else:
209
        try:
210
            with open(str(include)) as file:
211
                raw_table_list = list(csv.reader(file))
212
        except IOError:  # FileNotFoundError is not in Python2
213
            raw_table_list = None
214
            panflute.debug("pantable: file not found from the path", include)
215
    return raw_table_list
216
217
218
def regularize_table_list(raw_table_list):
219
    """
220
    When the length of rows are uneven, make it as long as the longest row.
221
    """
222
    length_of_rows = [len(row) for row in raw_table_list]
223
    number_of_columns = max(length_of_rows)
224
    try:
225
        assert all(i == number_of_columns for i in length_of_rows)
226
        table_list = raw_table_list
227
    except AssertionError:
228
        table_list = [
229
            row + ['' for __ in range(number_of_columns - len(row))] for row in raw_table_list]
230
        panflute.debug(
231
            "pantable: table rows are of irregular length. Empty cells appended.")
232
    return (table_list, number_of_columns)
233
234
235
def parse_table_list(markdown, table_list):
236
    """
237
    read table in list and return panflute table format
238
    """
239
    # make functions local
240
    to_table_row = panflute.TableRow
241
    if markdown:
242
        to_table_cell = lambda x: panflute.TableCell(*panflute.convert_text(x))
243
    else:
244
        to_table_cell = lambda x: panflute.TableCell(
245
            panflute.Plain(panflute.Str(x)))
246
    return [to_table_row(*[to_table_cell(x) for x in row]) for row in table_list]
247
248
249
def convert2table(options, data, **__):
250
    """
251
    provided to panflute.yaml_filter to parse its content as pandoc table.
252
    """
253
    # prepare table in list from data/include
254
    raw_table_list = read_data(options.get('include', None), data)
255
    # delete element if table is empty (by returning [])
256
    # element unchanged if include is invalid (by returning None)
257
    try:
258
        assert raw_table_list and raw_table_list is not None
259
    except AssertionError:
260
        panflute.debug("pantable: table is empty or include is invalid")
261
        # [] means delete the current element; None means kept as is
262
        return raw_table_list
263
    # regularize table: all rows should have same length
264
    table_list, number_of_columns = regularize_table_list(raw_table_list)
265
266
    # Initialize the `options` output from `panflute.yaml_filter`
267
    # parse width
268
    width = get_width(options, number_of_columns)
269
    # auto-width when width is not specified
270
    if width is None:
271
        width = auto_width(get_table_width(
272
            options), number_of_columns, table_list)
273
    # delete element if table is empty (by returning [])
274
    # width remains None only when table is empty
275
    try:
276
        assert width is not None
277
    except AssertionError:
278
        panflute.debug("pantable: table is empty")
279
        return []
280
    # parse alignment
281
    alignment = parse_alignment(options.get(
282
        'alignment', None), number_of_columns)
283
    header = to_bool(options.get('header', True), True)
284
    markdown = to_bool(options.get('markdown', False), False)
285
286
    # get caption: parsed as markdown into panflute AST if non-empty.
287
    caption = panflute.convert_text(str(options['caption']))[
288
        0].content if 'caption' in options else None
289
    # parse list to panflute table
290
    table_body = parse_table_list(markdown, table_list)
291
    # extract header row
292
    header_row = table_body.pop(0) if (
293
        len(table_body) > 1 and header
294
    ) else None
295
    return panflute.Table(
296
        *table_body,
297
        caption=caption,
298
        alignment=alignment,
299
        width=width,
300
        header=header_row
301
    )
302
303
304
def main(_=None):
305
    """
306
    Fenced code block with class table will be parsed using
307
    panflute.yaml_filter with the fuction convert2table above.
308
    """
309
    return panflute.run_filter(
310
        panflute.yaml_filter,
311
        tag='table',
312
        function=convert2table,
313
        strict_yaml=True
314
    )
315
316
if __name__ == '__main__':
317
    main()
318