1 | """Functions to export documents and items.""" |
||
2 | |||
3 | 1 | import os |
|
4 | 1 | import csv |
|
5 | 1 | import datetime |
|
6 | 1 | from collections import defaultdict |
|
7 | |||
8 | 1 | import yaml |
|
9 | 1 | import openpyxl |
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
10 | |||
11 | 1 | from doorstop import common |
|
12 | 1 | from doorstop.common import DoorstopError |
|
13 | 1 | from doorstop.core.types import iter_documents, iter_items |
|
14 | 1 | from doorstop import settings |
|
15 | |||
16 | 1 | LIST_SEP = '\n' # string separating list values when joined in a string |
|
17 | |||
18 | 1 | XLSX_MAX_WIDTH = 65 # maximum width for a column |
|
19 | 1 | XLSX_FILTER_PADDING = 3.5 # column padding to account for filter button |
|
20 | |||
21 | 1 | log = common.logger(__name__) |
|
22 | |||
23 | |||
24 | 1 | def export(obj, path, ext=None, **kwargs): |
|
25 | """Export an object to a given format. |
||
26 | |||
27 | The function can be called in two ways: |
||
28 | |||
29 | 1. document or item-like object + output file path |
||
30 | 2. tree-like object + output directory path |
||
31 | |||
32 | :param obj: (1) Item, list of Items, Document or (2) Tree |
||
33 | :param path: (1) output file path or (2) output directory path |
||
34 | :param ext: file extension to override output extension |
||
35 | |||
36 | :raises: :class:`doorstop.common.DoorstopError` for unknown file formats |
||
37 | |||
38 | :return: output location if files created, else None |
||
39 | |||
40 | """ |
||
41 | # Determine the output format |
||
42 | 1 | ext = ext or os.path.splitext(path)[-1] or '.csv' |
|
43 | 1 | check(ext) |
|
44 | |||
45 | # Export documents |
||
46 | 1 | count = 0 |
|
47 | 1 | for obj2, path2 in iter_documents(obj, path, ext): |
|
48 | 1 | count += 1 |
|
49 | |||
50 | # Export content to the specified path |
||
51 | 1 | common.create_dirname(path2) |
|
52 | 1 | log.info("exporting to {}...".format(path2)) |
|
53 | 1 | if ext in FORMAT_LINES: |
|
54 | 1 | lines = export_lines(obj2, ext, **kwargs) |
|
55 | 1 | common.write_lines(lines, path2) |
|
56 | else: |
||
57 | 1 | export_file(obj2, path2, ext, **kwargs) |
|
58 | |||
59 | # Return the exported path |
||
60 | 1 | if count: |
|
61 | 1 | msg = "exported to {} file{}".format(count, 's' if count > 1 else '') |
|
62 | 1 | log.info(msg) |
|
63 | 1 | return path |
|
64 | else: |
||
65 | 1 | log.warning("nothing to export") |
|
66 | 1 | return None |
|
67 | |||
68 | |||
69 | 1 | def export_lines(obj, ext='.yml', **kwargs): |
|
70 | """Yield lines for an export in the specified format. |
||
71 | |||
72 | :param obj: Item, list of Items, or Document to export |
||
73 | :param ext: file extension to specify the output format |
||
74 | |||
75 | :raises: :class:`doorstop.common.DoorstopError` for unknown file formats |
||
76 | |||
77 | :return: lines generator |
||
78 | |||
79 | """ |
||
80 | 1 | gen = check(ext, get_lines_gen=True) |
|
81 | 1 | log.debug("yielding {} as lines of {}...".format(obj, ext)) |
|
82 | 1 | yield from gen(obj, **kwargs) |
|
83 | |||
84 | |||
85 | 1 | def export_file(obj, path, ext=None, **kwargs): |
|
86 | """Create a file object for an export in the specified format. |
||
87 | |||
88 | :param obj: Item, list of Items, or Document to export |
||
89 | :param path: output file location with desired extension |
||
90 | :param ext: file extension to override output path's extension |
||
91 | |||
92 | :raises: :class:`doorstop.common.DoorstopError` for unknown file formats |
||
93 | |||
94 | :return: path to created file |
||
95 | |||
96 | """ |
||
97 | 1 | ext = ext or os.path.splitext(path)[-1] |
|
98 | 1 | func = check(ext, get_file_func=True) |
|
99 | 1 | log.debug("converting %s to file format %s...", obj, ext) |
|
100 | 1 | try: |
|
101 | 1 | return func(obj, path, **kwargs) |
|
102 | 1 | except IOError: |
|
103 | 1 | msg = "unable to write to: {}".format(path) |
|
104 | 1 | raise common.DoorstopFileError(msg) from None |
|
105 | |||
106 | |||
107 | 1 | def _lines_yaml(obj, **_): |
|
108 | """Yield lines for a YAML export. |
||
109 | |||
110 | :param obj: Item, list of Items, or Document to export |
||
111 | |||
112 | :return: iterator of lines of text |
||
113 | |||
114 | """ |
||
115 | 1 | for item in iter_items(obj): |
|
116 | |||
117 | 1 | data = {str(item.uid): item.data} |
|
118 | 1 | text = yaml.dump(data, default_flow_style=False, allow_unicode=True) |
|
119 | 1 | yield text |
|
120 | |||
121 | |||
122 | 1 | def _tabulate(obj, sep=LIST_SEP, auto=False): |
|
123 | """Yield lines of header/data for tabular export. |
||
124 | |||
125 | :param obj: Item, list of Items, or Document to export |
||
126 | :param sep: string separating list values when joined in a string |
||
127 | :param auto: include placeholders for new items on import |
||
128 | |||
129 | :return: iterator of rows of data |
||
130 | |||
131 | """ |
||
132 | 1 | yield_header = True |
|
133 | |||
134 | 1 | for item in iter_items(obj): |
|
135 | |||
136 | 1 | data = item.data |
|
137 | |||
138 | # Yield header |
||
139 | 1 | if yield_header: |
|
140 | 1 | header = ['level', 'text', 'ref', 'links'] |
|
141 | 1 | for value in sorted(data.keys()): |
|
142 | 1 | if value not in header: |
|
143 | 1 | header.append(value) |
|
144 | 1 | yield ['uid'] + header |
|
145 | 1 | yield_header = False |
|
146 | |||
147 | # Yield row |
||
148 | 1 | row = [item.uid] |
|
149 | 1 | for key in header: |
|
150 | 1 | value = data.get(key) |
|
151 | 1 | if key == 'level': |
|
152 | # some levels are floats for YAML presentation |
||
153 | 1 | value = str(value) |
|
154 | 1 | elif key == 'links': |
|
155 | # separate identifiers with a delimiter |
||
156 | 1 | value = sep.join(uid.string for uid in item.links) |
|
157 | 1 | elif isinstance(value, str) and key not in ('reviewed',): |
|
158 | # remove sentence boundaries and line wrapping |
||
159 | 1 | value = item.get(key) |
|
160 | 1 | elif value is None: |
|
161 | 1 | value = '' |
|
162 | 1 | row.append(value) |
|
163 | 1 | yield row |
|
164 | |||
165 | # Yield placeholders for new items |
||
166 | 1 | if auto: |
|
167 | 1 | for _ in range(settings.PLACEHOLDER_COUNT): |
|
168 | 1 | yield [settings.PLACEHOLDER] |
|
169 | |||
170 | |||
171 | 1 | def _file_csv(obj, path, delimiter=',', auto=False): |
|
172 | """Create a CSV file at the given path. |
||
173 | |||
174 | :param obj: Item, list of Items, or Document to export |
||
175 | :param path: location to export CSV file |
||
176 | :param delimiter: character to delimit fields |
||
177 | :param auto: include placeholders for new items on import |
||
178 | |||
179 | :return: path of created file |
||
180 | |||
181 | """ |
||
182 | 1 | with open(path, 'w', newline='', encoding='utf-8') as stream: |
|
183 | 1 | writer = csv.writer(stream, delimiter=delimiter) |
|
184 | 1 | for row in _tabulate(obj, auto=auto): |
|
185 | 1 | writer.writerow(row) |
|
186 | 1 | return path |
|
187 | |||
188 | |||
189 | 1 | def _file_tsv(obj, path, auto=False): |
|
190 | """Create a TSV file at the given path. |
||
191 | |||
192 | :param obj: Item, list of Items, or Document to export |
||
193 | :param path: location to export TSV file |
||
194 | :param auto: include placeholders for new items on import |
||
195 | |||
196 | :return: path of created file |
||
197 | |||
198 | """ |
||
199 | 1 | return _file_csv(obj, path, delimiter='\t', auto=auto) |
|
200 | |||
201 | |||
202 | 1 | def _file_xlsx(obj, path, auto=False): |
|
203 | """Create an XLSX file at the given path. |
||
204 | |||
205 | :param obj: Item, list of Items, or Document to export |
||
206 | :param path: location to export XLSX file |
||
207 | :param auto: include placeholders for new items on import |
||
208 | |||
209 | :return: path of created file |
||
210 | |||
211 | """ |
||
212 | 1 | workbook = _get_xlsx(obj, auto) |
|
213 | 1 | workbook.save(path) |
|
214 | |||
215 | 1 | return path |
|
216 | |||
217 | |||
218 | 1 | def _get_xlsx(obj, auto): |
|
219 | """Create an XLSX workbook object. |
||
220 | |||
221 | :param obj: Item, list of Items, or Document to export |
||
222 | :param auto: include placeholders for new items on import |
||
223 | |||
224 | :return: new workbook |
||
225 | |||
226 | """ |
||
227 | 1 | col_widths = defaultdict(int) |
|
228 | 1 | col = 'A' |
|
229 | |||
230 | # Create a new workbook |
||
231 | 1 | workbook = openpyxl.Workbook() |
|
232 | 1 | worksheet = workbook.active |
|
233 | |||
234 | # Populate cells |
||
235 | 1 | for row, data in enumerate(_tabulate(obj, auto=auto), start=1): |
|
236 | 1 | for col_idx, value in enumerate(data, start=1): |
|
237 | 1 | col = openpyxl.cell.get_column_letter(col_idx) |
|
238 | 1 | cell = worksheet.cell('%s%s' % (col, row)) |
|
239 | |||
240 | # wrap text in every cell |
||
241 | 1 | alignment = openpyxl.styles.Alignment(vertical='top', |
|
242 | horizontal='left', |
||
243 | wrap_text=True) |
||
244 | 1 | style = cell.style.copy(alignment=alignment) |
|
245 | # and bold header rows |
||
246 | 1 | if row == 1: |
|
247 | 1 | style = style.copy(font=openpyxl.styles.Font(bold=True)) |
|
248 | 1 | cell.style = style |
|
249 | |||
250 | # convert incompatible Excel types: |
||
251 | # http://pythonhosted.org/openpyxl/api.html#openpyxl.cell.Cell.value |
||
252 | 1 | if not isinstance(value, (int, float, str, datetime.datetime)): |
|
253 | 1 | value = str(value) |
|
254 | 1 | cell.value = value |
|
255 | |||
256 | # track cell width |
||
257 | 1 | col_widths[col] = max(col_widths[col], _width(str(value))) |
|
258 | |||
259 | # Add filter up to the last column |
||
260 | 1 | worksheet.auto_filter.ref = "A1:%s1" % col |
|
261 | |||
262 | # Set column width based on column contents |
||
263 | 1 | for col in col_widths: |
|
264 | 1 | if col_widths[col] > XLSX_MAX_WIDTH: |
|
265 | 1 | width = XLSX_MAX_WIDTH |
|
266 | else: |
||
267 | 1 | width = col_widths[col] + XLSX_FILTER_PADDING |
|
268 | 1 | worksheet.column_dimensions[col].width = width |
|
269 | |||
270 | # Freeze top row |
||
271 | 1 | worksheet.freeze_panes = worksheet.cell('A2') |
|
272 | |||
273 | 1 | return workbook |
|
274 | |||
275 | |||
276 | 1 | def _width(text): |
|
277 | """Get the maximum length in a multiline string.""" |
||
278 | 1 | if text: |
|
279 | 1 | return max(len(line) for line in text.splitlines()) |
|
280 | else: |
||
281 | 1 | return 0 |
|
282 | |||
283 | |||
284 | # Mapping from file extension to lines generator |
||
285 | 1 | FORMAT_LINES = {'.yml': _lines_yaml} |
|
286 | # Mapping from file extension to file generator |
||
287 | 1 | FORMAT_FILE = {'.csv': _file_csv, |
|
288 | '.tsv': _file_tsv, |
||
289 | '.xlsx': _file_xlsx} |
||
290 | # Union of format dictionaries |
||
291 | 1 | FORMAT = dict(list(FORMAT_LINES.items()) + list(FORMAT_FILE.items())) |
|
292 | |||
293 | |||
294 | 1 | def check(ext, get_lines_gen=False, get_file_func=False): |
|
295 | """Confirm an extension is supported for export. |
||
296 | |||
297 | :param get_lines_func: return a lines generator if available |
||
298 | :param get_file_func: return a file creator if available |
||
299 | |||
300 | :raises: :class:`doorstop.common.DoorstopError` for unknown formats |
||
301 | |||
302 | :return: function requested if available |
||
303 | |||
304 | """ |
||
305 | 1 | exts = ', '.join(ext for ext in FORMAT) |
|
306 | 1 | lines_exts = ', '.join(ext for ext in FORMAT_LINES) |
|
307 | 1 | file_exts = ', '.join(ext for ext in FORMAT_FILE) |
|
308 | 1 | fmt = "unknown {{}} format: {} (options: {{}})".format(ext or None) |
|
309 | |||
310 | 1 | if get_lines_gen: |
|
311 | 1 | try: |
|
312 | 1 | gen = FORMAT_LINES[ext] |
|
313 | 1 | except KeyError: |
|
314 | 1 | exc = DoorstopError(fmt.format("lines export", lines_exts)) |
|
315 | 1 | raise exc from None |
|
316 | else: |
||
317 | 1 | log.debug("found lines generator for: {}".format(ext)) |
|
318 | 1 | return gen |
|
319 | |||
320 | 1 | if get_file_func: |
|
321 | 1 | try: |
|
322 | 1 | func = FORMAT_FILE[ext] |
|
323 | 1 | except KeyError: |
|
324 | 1 | exc = DoorstopError(fmt.format("file export", file_exts)) |
|
325 | 1 | raise exc from None |
|
326 | else: |
||
327 | 1 | log.debug("found file creator for: {}".format(ext)) |
|
328 | 1 | return func |
|
329 | |||
330 | 1 | if ext not in FORMAT: |
|
331 | 1 | exc = DoorstopError(fmt.format("export", exts)) |
|
332 | 1 | raise exc |
|
333 | |||
334 | return None |
||
335 |