doorstop.core.exporter._tabulate()   F
last analyzed

Complexity

Conditions 20

Size

Total Lines 82
Code Lines 50

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 50
dl 0
loc 82
rs 0
c 0
b 0
f 0
cc 20
nop 3

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like doorstop.core.exporter._tabulate() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# SPDX-License-Identifier: LGPL-3.0-only
2
3
"""Functions to export documents and items."""
4
5
import datetime
6
import os
7
from collections import defaultdict
8
from typing import Any, Dict
9
10
import openpyxl
11
import yaml
12
13
from doorstop import common, settings
14
from doorstop.common import DoorstopError
15
from doorstop.core.types import iter_documents, iter_items
16
17
LIST_SEP = "\n"  # string separating list values when joined in a string
18
19
XLSX_MAX_WIDTH = 65.0  # maximum width for a column
20
XLSX_FILTER_PADDING = 3.5  # column padding to account for filter button
21
22
log = common.logger(__name__)
23
24
25
def export(obj, path, ext=None, **kwargs):
26
    """Export an object to a given format.
27
28
    The function can be called in two ways:
29
30
    1. document or item-like object + output file path
31
    2. tree-like object + output directory path
32
33
    :param obj: (1) Item, list of Items, Document or (2) Tree
34
    :param path: (1) output file path or (2) output directory path
35
    :param ext: file extension to override output extension
36
37
    :raises: :class:`doorstop.common.DoorstopError` for unknown file formats
38
39
    :return: output location if files created, else None
40
41
    """
42
    # Determine the output format
43
    ext = ext or os.path.splitext(path)[-1] or ".csv"
44
    check(ext)
45
46
    # Export documents
47
    count = 0
48
    for obj2, path2 in iter_documents(obj, path, ext):
49
        count += 1
50
51
        # Export content to the specified path
52
        common.create_dirname(path2)
53
        log.info("exporting to {}...".format(path2))
54
        if ext in FORMAT_LINES:
55
            lines = export_lines(obj2, ext, **kwargs)
56
            common.write_lines(lines, path2, end=settings.WRITE_LINESEPERATOR)
57
        else:
58
            export_file(obj2, path2, ext, **kwargs)
59
60
    # Return the exported path
61
    if count:
62
        msg = "exported to {} file{}".format(count, "s" if count > 1 else "")
63
        log.info(msg)
64
        return path
65
    else:
66
        log.warning("nothing to export")
67
        return None
68
69
70
def export_lines(obj, ext=".yml", **kwargs):
71
    """Yield lines for an export in the specified format.
72
73
    :param obj: Item, list of Items, or Document to export
74
    :param ext: file extension to specify the output format
75
76
    :raises: :class:`doorstop.common.DoorstopError` for unknown file formats
77
78
    :return: lines generator
79
80
    """
81
    gen = check(ext, get_lines_gen=True)
82
    log.debug("yielding {} as lines of {}...".format(obj, ext))
83
    yield from gen(obj, **kwargs)
84
85
86
def export_file(obj, path, ext=None, **kwargs):
87
    """Create a file object for an export in the specified format.
88
89
    :param obj: Item, list of Items, or Document to export
90
    :param path: output file location with desired extension
91
    :param ext: file extension to override output path's extension
92
93
    :raises: :class:`doorstop.common.DoorstopError` for unknown file formats
94
95
    :return: path to created file
96
97
    """
98
    ext = ext or os.path.splitext(path)[-1]
99
    func = check(ext, get_file_func=True)
100
    log.debug("converting %s to file format %s...", obj, ext)
101
    try:
102
        return func(obj, path, **kwargs)
103
    except IOError:
104
        msg = "unable to write to: {}".format(path)
105
        raise common.DoorstopFileError(msg) from None
106
107
108
def _lines_yaml(obj, **_):
109
    """Yield lines for a YAML export.
110
111
    :param obj: Item, list of Items, or Document to export
112
113
    :return: iterator of lines of text
114
115
    """
116
    for item in iter_items(obj):
117
        data = {str(item.uid): item.data}
118
        text = yaml.dump(data, default_flow_style=False, allow_unicode=True)
119
        yield text
120
121
122
def _tabulate(obj, sep=LIST_SEP, auto=False):
123
    """Yield lines of header/data for tabular export.
124
125
    :param obj: Item, list of Items, or Document to export
126
    :param sep: string separating list values when joined in a string
127
    :param auto: include placeholders for new items on import
128
129
    :return: iterator of rows of data
130
131
    """
132
133
    header = ["level", "text", "ref", "links"]
134
135
    # 'at_least_one_ref' detects if at least one of the items still have a deprecated 'ref' field.
136
    # If there is none, 'ref' header is excluded from the headers and is not exported.
137
    at_least_one_ref = False
138
    for item in iter_items(obj):
139
        data = item.data
140
141
        for value in sorted(data.keys()):
142
            if value not in header:
143
                header.append(value)
144
145
        ref_value = data.get("ref")
146
        if ref_value:
147
            at_least_one_ref = True
148
149
    try:
150
        reference_index = header.index("references")
151
152
        # Inserting 'references' header after the 'ref' header.
153
        header.insert(3, header.pop(reference_index))
154
155
        if not at_least_one_ref:
156
            header.remove("ref")
157
    except ValueError:
158
        pass
159
160
    yield ["uid"] + header
161
162
    for item in iter_items(obj):
163
        data = item.data
164
165
        # Yield row
166
        row = [item.uid]
167
        for key in header:
168
            value = data.get(key)
169
            if key == "level":
170
                # some levels are floats for YAML presentation
171
                value = str(value)
172
            elif key == "links":
173
                # separate identifiers with a delimiter
174
                value = sep.join(uid.string for uid in item.links)
175
            elif key == "references":
176
                if value is None:
177
                    value = ""
178
                else:
179
                    ref_strings = []
180
                    for ref_item in value:
181
                        ref_type = ref_item["type"]
182
                        ref_path = ref_item["path"]
183
184
                        ref_string = "type:{},path:{}".format(ref_type, ref_path)
185
186
                        if "keyword" in ref_item:
187
                            keyword = ref_item["keyword"]
188
                            ref_string += ",keyword:{}".format(keyword)
189
190
                        ref_strings.append(ref_string)
191
                    value = "\n".join(ref_string for ref_string in ref_strings)
192
            elif isinstance(value, str) and key not in ("reviewed",):
193
                # remove sentence boundaries and line wrapping
194
                value = item.get(key)
195
            elif value is None:
196
                value = ""
197
            row.append(value)
198
        yield row
199
200
    # Yield placeholders for new items
201
    if auto:
202
        for _ in range(settings.PLACEHOLDER_COUNT):
203
            yield [settings.PLACEHOLDER]
204
205
206
def _file_csv(obj, path, delimiter=",", auto=False):
207
    """Create a CSV file at the given path.
208
209
    :param obj: Item, list of Items, or Document to export
210
    :param path: location to export CSV file
211
    :param delimiter: character to delimit fields
212
    :param auto: include placeholders for new items on import
213
214
    :return: path of created file
215
216
    """
217
    return common.write_csv(
218
        _tabulate(obj, auto=auto),
219
        path,
220
        delimiter=delimiter,
221
        newline="",
222
        encoding="utf-8",
223
    )
224
225
226
def _file_tsv(obj, path, auto=False):
227
    """Create a TSV file at the given path.
228
229
    :param obj: Item, list of Items, or Document to export
230
    :param path: location to export TSV file
231
    :param auto: include placeholders for new items on import
232
233
    :return: path of created file
234
235
    """
236
    return _file_csv(obj, path, delimiter="\t", auto=auto)
237
238
239
def _file_xlsx(obj, path, auto=False):
240
    """Create an XLSX file at the given path.
241
242
    :param obj: Item, list of Items, or Document to export
243
    :param path: location to export XLSX file
244
    :param auto: include placeholders for new items on import
245
246
    :return: path of created file
247
248
    """
249
    workbook = _get_xlsx(obj, auto)
250
    workbook.save(path)
251
252
    return path
253
254
255
def _get_xlsx(obj, auto):
256
    """Create an XLSX workbook object.
257
258
    :param obj: Item, list of Items, or Document to export
259
    :param auto: include placeholders for new items on import
260
261
    :return: new workbook
262
263
    """
264
    col_widths: Dict[Any, float] = defaultdict(float)
265
    col = "A"
266
267
    # Create a new workbook
268
    workbook = openpyxl.Workbook()
269
    worksheet = workbook.active
270
271
    # Populate cells
272
    for row, data in enumerate(_tabulate(obj, auto=auto), start=1):
273
        for col_idx, value in enumerate(data, start=1):
274
            cell = worksheet.cell(column=col_idx, row=row)
275
276
            # wrap text in every cell
277
            alignment = openpyxl.styles.Alignment(
278
                vertical="top", horizontal="left", wrap_text=True
279
            )
280
            cell.alignment = alignment
281
            # and bold header rows
282
            if row == 1:
283
                cell.font = openpyxl.styles.Font(bold=True)
284
285
            # convert incompatible Excel types:
286
            # http://pythonhosted.org/openpyxl/api.html#openpyxl.cell.Cell.value
287
            if isinstance(value, (int, float, datetime.datetime)):
288
                cell.value = value
289
            else:
290
                cell.value = str(value)
291
292
            # track cell width
293
            col_widths[col_idx] = max(col_widths[col_idx], _width(str(value)))
294
295
    # Add filter up to the last column
296
    col_letter = openpyxl.utils.get_column_letter(len(col_widths))
297
    worksheet.auto_filter.ref = "A1:%s1" % col_letter
298
299
    # Set column width based on column contents
300
    for col in col_widths:
301
        if col_widths[col] > XLSX_MAX_WIDTH:
302
            width = XLSX_MAX_WIDTH
303
        else:
304
            width = col_widths[col] + XLSX_FILTER_PADDING
305
        col_letter = openpyxl.utils.get_column_letter(col)
306
        worksheet.column_dimensions[col_letter].width = width
307
308
    # Freeze top row
309
    worksheet.freeze_panes = worksheet.cell(row=2, column=1)
310
311
    return workbook
312
313
314
def _width(text):
315
    """Get the maximum length in a multiline string."""
316
    if text:
317
        return max(len(line) for line in text.splitlines())
318
    else:
319
        return 0
320
321
322
# Mapping from file extension to lines generator
323
FORMAT_LINES = {".yml": _lines_yaml}
324
# Mapping from file extension to file generator
325
FORMAT_FILE = {".csv": _file_csv, ".tsv": _file_tsv, ".xlsx": _file_xlsx}
326
# Union of format dictionaries
327
FORMAT = dict(list(FORMAT_LINES.items()) + list(FORMAT_FILE.items()))  # type: ignore
328
329
330
def check(ext, get_lines_gen=False, get_file_func=False):
331
    """Confirm an extension is supported for export.
332
333
    :param get_lines_func: return a lines generator if available
334
    :param get_file_func: return a file creator if available
335
336
    :raises: :class:`doorstop.common.DoorstopError` for unknown formats
337
338
    :return: function requested if available
339
340
    """
341
    exts = ", ".join(ext for ext in FORMAT)
342
    lines_exts = ", ".join(ext for ext in FORMAT_LINES)
343
    file_exts = ", ".join(ext for ext in FORMAT_FILE)
344
    fmt = "unknown {{}} format: {} (options: {{}})".format(ext or None)
345
346
    if get_lines_gen:
347
        try:
348
            gen = FORMAT_LINES[ext]
349
        except KeyError:
350
            exc = DoorstopError(fmt.format("lines export", lines_exts))
351
            raise exc from None
352
        else:
353
            log.debug("found lines generator for: {}".format(ext))
354
            return gen
355
356
    if get_file_func:
357
        try:
358
            func = FORMAT_FILE[ext]
359
        except KeyError:
360
            exc = DoorstopError(fmt.format("file export", file_exts))
361
            raise exc from None
362
        else:
363
            log.debug("found file creator for: {}".format(ext))
364
            return func
365
366
    if ext not in FORMAT:
367
        exc = DoorstopError(fmt.format("export", exts))
368
        raise exc
369
370
    return None
371