Completed
Push — develop ( c55321...0353d4 )
by Jace
05:33
created

doorstop/core/exporter.py (1 issue)

1
"""Functions to export documents and items."""
2
3 1
import os
4 1
import csv
5 1
import datetime
6 1
from collections import defaultdict
7
8 1
import yaml
9 1
import openpyxl
0 ignored issues
show
third party import "import openpyxl" should be placed before "import yaml"
Loading history...
10
11 1
from doorstop import common
12 1
from doorstop.common import DoorstopError
13 1
from doorstop.core.types import iter_documents, iter_items
14 1
from doorstop import settings
15
16 1
LIST_SEP = '\n'  # string separating list values when joined in a string
17
18 1
XLSX_MAX_WIDTH = 65  # maximum width for a column
19 1
XLSX_FILTER_PADDING = 3.5  # column padding to account for filter button
20
21 1
log = common.logger(__name__)
22
23
24 1
def export(obj, path, ext=None, **kwargs):
25
    """Export an object to a given format.
26
27
    The function can be called in two ways:
28
29
    1. document or item-like object + output file path
30
    2. tree-like object + output directory path
31
32
    :param obj: (1) Item, list of Items, Document or (2) Tree
33
    :param path: (1) output file path or (2) output directory path
34
    :param ext: file extension to override output extension
35
36
    :raises: :class:`doorstop.common.DoorstopError` for unknown file formats
37
38
    :return: output location if files created, else None
39
40
    """
41
    # Determine the output format
42 1
    ext = ext or os.path.splitext(path)[-1] or '.csv'
43 1
    check(ext)
44
45
    # Export documents
46 1
    count = 0
47 1
    for obj2, path2 in iter_documents(obj, path, ext):
48 1
        count += 1
49
50
        # Export content to the specified path
51 1
        common.create_dirname(path2)
52 1
        log.info("exporting to {}...".format(path2))
53 1
        if ext in FORMAT_LINES:
54 1
            lines = export_lines(obj2, ext, **kwargs)
55 1
            common.write_lines(lines, path2)
56
        else:
57 1
            export_file(obj2, path2, ext, **kwargs)
58
59
    # Return the exported path
60 1
    if count:
61 1
        msg = "exported to {} file{}".format(count, 's' if count > 1 else '')
62 1
        log.info(msg)
63 1
        return path
64
    else:
65 1
        log.warning("nothing to export")
66 1
        return None
67
68
69 1
def export_lines(obj, ext='.yml', **kwargs):
70
    """Yield lines for an export in the specified format.
71
72
    :param obj: Item, list of Items, or Document to export
73
    :param ext: file extension to specify the output format
74
75
    :raises: :class:`doorstop.common.DoorstopError` for unknown file formats
76
77
    :return: lines generator
78
79
    """
80 1
    gen = check(ext, get_lines_gen=True)
81 1
    log.debug("yielding {} as lines of {}...".format(obj, ext))
82 1
    yield from gen(obj, **kwargs)
83
84
85 1
def export_file(obj, path, ext=None, **kwargs):
86
    """Create a file object for an export in the specified format.
87
88
    :param obj: Item, list of Items, or Document to export
89
    :param path: output file location with desired extension
90
    :param ext: file extension to override output path's extension
91
92
    :raises: :class:`doorstop.common.DoorstopError` for unknown file formats
93
94
    :return: path to created file
95
96
    """
97 1
    ext = ext or os.path.splitext(path)[-1]
98 1
    func = check(ext, get_file_func=True)
99 1
    log.debug("converting %s to file format %s...", obj, ext)
100 1
    try:
101 1
        return func(obj, path, **kwargs)
102 1
    except IOError:
103 1
        msg = "unable to write to: {}".format(path)
104 1
        raise common.DoorstopFileError(msg) from None
105
106
107 1
def _lines_yaml(obj, **_):
108
    """Yield lines for a YAML export.
109
110
    :param obj: Item, list of Items, or Document to export
111
112
    :return: iterator of lines of text
113
114
    """
115 1
    for item in iter_items(obj):
116
117 1
        data = {str(item.uid): item.data}
118 1
        text = yaml.dump(data, default_flow_style=False, allow_unicode=True)
119 1
        yield text
120
121
122 1
def _tabulate(obj, sep=LIST_SEP, auto=False):
123
    """Yield lines of header/data for tabular export.
124
125
    :param obj: Item, list of Items, or Document to export
126
    :param sep: string separating list values when joined in a string
127
    :param auto: include placeholders for new items on import
128
129
    :return: iterator of rows of data
130
131
    """
132 1
    yield_header = True
133
134 1
    for item in iter_items(obj):
135
136 1
        data = item.data
137
138
        # Yield header
139 1
        if yield_header:
140 1
            header = ['level', 'text', 'ref', 'links']
141 1
            for value in sorted(data.keys()):
142 1
                if value not in header:
143 1
                    header.append(value)
144 1
            yield ['uid'] + header
145 1
            yield_header = False
146
147
        # Yield row
148 1
        row = [item.uid]
149 1
        for key in header:
150 1
            value = data.get(key)
151 1
            if key == 'level':
152
                # some levels are floats for YAML presentation
153 1
                value = str(value)
154 1
            elif key == 'links':
155
                # separate identifiers with a delimiter
156 1
                value = sep.join(uid.string for uid in item.links)
157 1
            elif isinstance(value, str) and key not in ('reviewed',):
158
                # remove sentence boundaries and line wrapping
159 1
                value = item.get(key)
160 1
            elif value is None:
161 1
                value = ''
162 1
            row.append(value)
163 1
        yield row
164
165
    # Yield placeholders for new items
166 1
    if auto:
167 1
        for _ in range(settings.PLACEHOLDER_COUNT):
168 1
            yield [settings.PLACEHOLDER]
169
170
171 1
def _file_csv(obj, path, delimiter=',', auto=False):
172
    """Create a CSV file at the given path.
173
174
    :param obj: Item, list of Items, or Document to export
175
    :param path: location to export CSV file
176
    :param delimiter: character to delimit fields
177
    :param auto: include placeholders for new items on import
178
179
    :return: path of created file
180
181
    """
182 1
    with open(path, 'w', newline='', encoding='utf-8') as stream:
183 1
        writer = csv.writer(stream, delimiter=delimiter)
184 1
        for row in _tabulate(obj, auto=auto):
185 1
            writer.writerow(row)
186 1
    return path
187
188
189 1
def _file_tsv(obj, path, auto=False):
190
    """Create a TSV file at the given path.
191
192
    :param obj: Item, list of Items, or Document to export
193
    :param path: location to export TSV file
194
    :param auto: include placeholders for new items on import
195
196
    :return: path of created file
197
198
    """
199 1
    return _file_csv(obj, path, delimiter='\t', auto=auto)
200
201
202 1
def _file_xlsx(obj, path, auto=False):
203
    """Create an XLSX file at the given path.
204
205
    :param obj: Item, list of Items, or Document to export
206
    :param path: location to export XLSX file
207
    :param auto: include placeholders for new items on import
208
209
    :return: path of created file
210
211
    """
212 1
    workbook = _get_xlsx(obj, auto)
213 1
    workbook.save(path)
214
215 1
    return path
216
217
218 1
def _get_xlsx(obj, auto):
219
    """Create an XLSX workbook object.
220
221
    :param obj: Item, list of Items, or Document to export
222
    :param auto: include placeholders for new items on import
223
224
    :return: new workbook
225
226
    """
227 1
    col_widths = defaultdict(int)
228 1
    col = 'A'
229
230
    # Create a new workbook
231 1
    workbook = openpyxl.Workbook()
232 1
    worksheet = workbook.active
233
234
    # Populate cells
235 1
    for row, data in enumerate(_tabulate(obj, auto=auto), start=1):
236 1
        for col_idx, value in enumerate(data, start=1):
237 1
            col = openpyxl.cell.get_column_letter(col_idx)
238 1
            cell = worksheet.cell('%s%s' % (col, row))
239
240
            # wrap text in every cell
241 1
            alignment = openpyxl.styles.Alignment(vertical='top',
242
                                                  horizontal='left',
243
                                                  wrap_text=True)
244 1
            style = cell.style.copy(alignment=alignment)
245
            # and bold header rows
246 1
            if row == 1:
247 1
                style = style.copy(font=openpyxl.styles.Font(bold=True))
248 1
            cell.style = style
249
250
            # convert incompatible Excel types:
251
            # http://pythonhosted.org/openpyxl/api.html#openpyxl.cell.Cell.value
252 1
            if not isinstance(value, (int, float, str, datetime.datetime)):
253 1
                value = str(value)
254 1
            cell.value = value
255
256
            # track cell width
257 1
            col_widths[col] = max(col_widths[col], _width(str(value)))
258
259
    # Add filter up to the last column
260 1
    worksheet.auto_filter.ref = "A1:%s1" % col
261
262
    # Set column width based on column contents
263 1
    for col in col_widths:
264 1
        if col_widths[col] > XLSX_MAX_WIDTH:
265 1
            width = XLSX_MAX_WIDTH
266
        else:
267 1
            width = col_widths[col] + XLSX_FILTER_PADDING
268 1
        worksheet.column_dimensions[col].width = width
269
270
    # Freeze top row
271 1
    worksheet.freeze_panes = worksheet.cell('A2')
272
273 1
    return workbook
274
275
276 1
def _width(text):
277
    """Get the maximum length in a multiline string."""
278 1
    if text:
279 1
        return max(len(line) for line in text.splitlines())
280
    else:
281 1
        return 0
282
283
284
# Mapping from file extension to lines generator
285 1
FORMAT_LINES = {'.yml': _lines_yaml}
286
# Mapping from file extension to file generator
287 1
FORMAT_FILE = {'.csv': _file_csv,
288
               '.tsv': _file_tsv,
289
               '.xlsx': _file_xlsx}
290
# Union of format dictionaries
291 1
FORMAT = dict(list(FORMAT_LINES.items()) + list(FORMAT_FILE.items()))
292
293
294 1
def check(ext, get_lines_gen=False, get_file_func=False):
295
    """Confirm an extension is supported for export.
296
297
    :param get_lines_func: return a lines generator if available
298
    :param get_file_func: return a file creator if available
299
300
    :raises: :class:`doorstop.common.DoorstopError` for unknown formats
301
302
    :return: function requested if available
303
304
    """
305 1
    exts = ', '.join(ext for ext in FORMAT)
306 1
    lines_exts = ', '.join(ext for ext in FORMAT_LINES)
307 1
    file_exts = ', '.join(ext for ext in FORMAT_FILE)
308 1
    fmt = "unknown {{}} format: {} (options: {{}})".format(ext or None)
309
310 1
    if get_lines_gen:
311 1
        try:
312 1
            gen = FORMAT_LINES[ext]
313 1
        except KeyError:
314 1
            exc = DoorstopError(fmt.format("lines export", lines_exts))
315 1
            raise exc from None
316
        else:
317 1
            log.debug("found lines generator for: {}".format(ext))
318 1
            return gen
319
320 1
    if get_file_func:
321 1
        try:
322 1
            func = FORMAT_FILE[ext]
323 1
        except KeyError:
324 1
            exc = DoorstopError(fmt.format("file export", file_exts))
325 1
            raise exc from None
326
        else:
327 1
            log.debug("found file creator for: {}".format(ext))
328 1
            return func
329
330 1
    if ext not in FORMAT:
331 1
        exc = DoorstopError(fmt.format("export", exts))
332 1
        raise exc
333
334
    return None
335