1
|
|
|
# SPDX-License-Identifier: LGPL-3.0-only |
2
|
|
|
|
3
|
1 |
|
"""Functions to import exiting documents and items.""" |
4
|
1 |
|
|
5
|
1 |
|
import csv |
6
|
1 |
|
import os |
7
|
|
|
import re |
8
|
1 |
|
import warnings |
9
|
|
|
from typing import Any |
10
|
1 |
|
|
11
|
1 |
|
import openpyxl |
12
|
1 |
|
|
13
|
1 |
|
from doorstop import common, settings |
14
|
1 |
|
from doorstop.common import DoorstopError |
15
|
1 |
|
from doorstop.core.builder import _get_tree |
16
|
1 |
|
from doorstop.core.document import Document |
17
|
|
|
from doorstop.core.item import Item |
18
|
|
|
from doorstop.core.types import UID |
19
|
1 |
|
|
20
|
|
|
LIST_SEP_RE = re.compile(r"[\s;,]+") # regex to split list strings into parts |
21
|
1 |
|
|
22
|
|
|
_documents = [] # cache of unplaced documents |
23
|
1 |
|
|
24
|
|
|
log = common.logger(__name__) |
25
|
|
|
|
26
|
1 |
|
|
27
|
|
|
def import_file(path, document, ext=None, mapping=None, **kwargs): |
28
|
|
|
"""Import items from an exported file. |
29
|
|
|
|
30
|
|
|
:param path: input file location |
31
|
|
|
:param document: document to import items |
32
|
|
|
:param ext: file extension to override input path's extension |
33
|
|
|
:param mapping: dictionary mapping custom to standard attribute names |
34
|
|
|
|
35
|
|
|
:raise DoorstopError: for unknown file formats |
36
|
|
|
|
37
|
|
|
:return: document with imported items |
38
|
|
|
|
39
|
1 |
|
""" |
40
|
1 |
|
log.info("importing {} into {}...".format(path, document)) |
41
|
1 |
|
ext = ext or os.path.splitext(path)[-1] |
42
|
1 |
|
func = check(ext) |
43
|
|
|
func(path, document, mapping=mapping, **kwargs) |
44
|
|
|
|
45
|
1 |
|
|
46
|
|
|
def create_document(prefix, path, parent=None, tree=None): |
47
|
|
|
"""Create a Doorstop document from existing document information. |
48
|
|
|
|
49
|
|
|
:param prefix: existing document's prefix (for new items) |
50
|
|
|
:param path: new directory path to store this document's items |
51
|
|
|
:param parent: parent document's prefix (if one will exist) |
52
|
|
|
:param tree: explicit tree to add the document |
53
|
|
|
|
54
|
|
|
:return: imported Document |
55
|
|
|
|
56
|
1 |
|
""" |
57
|
1 |
|
if not tree: |
58
|
|
|
tree = _get_tree() |
59
|
|
|
|
60
|
1 |
|
# Attempt to create a document with the given parent |
61
|
1 |
|
log.info("importing document '{}'...".format(prefix)) |
62
|
1 |
|
try: |
63
|
1 |
|
document = tree.create_document(path, prefix, parent=parent) |
64
|
1 |
|
except DoorstopError as exc: |
65
|
1 |
|
if not parent: |
66
|
|
|
raise exc from None # pylint: disable=raising-bad-type |
67
|
|
|
|
68
|
1 |
|
# Create the document despite an unavailable parent |
69
|
|
|
document = Document.new(tree, path, tree.root, prefix, parent=parent) |
70
|
|
|
log.warning(exc) |
71
|
1 |
|
_documents.append(document) |
72
|
1 |
|
|
73
|
|
|
# TODO: attempt to place unplaced documents? |
74
|
|
|
|
75
|
|
|
log.info("imported: {}".format(document)) |
76
|
1 |
|
return document |
77
|
1 |
|
|
78
|
|
|
|
79
|
|
|
def add_item(prefix, uid, attrs=None, document=None, request_next_number=None): |
80
|
1 |
|
"""Create a Doorstop document from existing document information. |
81
|
|
|
|
82
|
|
|
:param prefix: previously imported document's prefix |
83
|
|
|
:param uid: existing item's UID |
84
|
|
|
:param attrs: dictionary of Doorstop and custom attributes |
85
|
|
|
:param document: explicit document to add the item |
86
|
|
|
:param request_next_number: server method to get a document's next number |
87
|
|
|
|
88
|
|
|
:return: imported Item |
89
|
|
|
|
90
|
|
|
""" |
91
|
|
|
if document: |
92
|
1 |
|
# Get an explicit tree |
93
|
|
|
tree = document.tree |
94
|
1 |
|
assert tree # tree should be set internally |
95
|
1 |
|
else: |
96
|
|
|
# Get an implicit tree and document |
97
|
|
|
tree = _get_tree(request_next_number=request_next_number) |
98
|
1 |
|
document = tree.find_document(prefix) |
99
|
1 |
|
|
100
|
|
|
# Add an item using the specified UID |
101
|
|
|
log.info("importing item '{}'...".format(uid)) |
102
|
1 |
|
item = Item.new(tree, document, document.path, document.root, uid, auto=False) |
103
|
1 |
|
for key, value in (attrs or {}).items(): |
104
|
|
|
item.set(key, value) |
105
|
|
|
item.save() |
106
|
1 |
|
|
107
|
1 |
|
log.info("imported: {}".format(item)) |
108
|
1 |
|
return item |
109
|
|
|
|
110
|
1 |
|
|
111
|
1 |
|
def _file_yml(path, document, **_): |
112
|
|
|
"""Import items from a YAML export to a document. |
113
|
|
|
|
114
|
1 |
|
:param path: input file location |
115
|
|
|
:param document: document to import items |
116
|
|
|
|
117
|
|
|
""" |
118
|
|
|
# Parse the file |
119
|
|
|
log.info("reading items in {}...".format(path)) |
120
|
|
|
text = common.read_text(path) |
121
|
|
|
# Load the YAML data |
122
|
1 |
|
data = common.load_yaml(text, path) |
123
|
1 |
|
# Add items |
124
|
|
|
for uid, attrs in data.items(): |
125
|
1 |
|
try: |
126
|
|
|
item = document.find_item(uid) |
127
|
1 |
|
except DoorstopError: |
128
|
1 |
|
pass # no matching item |
129
|
1 |
|
else: |
130
|
1 |
|
item.delete() |
131
|
1 |
|
add_item(document.prefix, uid, attrs=attrs, document=document) |
132
|
|
|
|
133
|
1 |
|
|
134
|
1 |
|
def _file_csv(path, document, delimiter=',', mapping=None): |
135
|
|
|
"""Import items from a CSV export to a document. |
136
|
|
|
|
137
|
1 |
|
:param path: input file location |
138
|
|
|
:param document: document to import items |
139
|
|
|
:param delimiter: CSV field delimiter |
140
|
|
|
:param mapping: dictionary mapping custom to standard attribute names |
141
|
|
|
|
142
|
|
|
""" |
143
|
|
|
rows = [] |
144
|
|
|
|
145
|
|
|
# Parse the file |
146
|
1 |
|
log.info("reading rows in {}...".format(path)) |
147
|
|
|
with open(path, 'r', encoding='utf-8') as stream: |
148
|
|
|
reader = csv.reader(stream, delimiter=delimiter) |
149
|
1 |
|
for _row in reader: |
150
|
1 |
|
row = [] |
151
|
1 |
|
value: Any |
152
|
1 |
|
for value in _row: |
153
|
1 |
|
# convert string booleans |
154
|
1 |
|
if isinstance(value, str): |
155
|
|
|
if value.lower() == 'true': |
156
|
1 |
|
value = True |
157
|
1 |
|
elif value.lower() == 'false': |
158
|
1 |
|
value = False |
159
|
1 |
|
row.append(value) |
160
|
1 |
|
rows.append(row) |
161
|
1 |
|
|
162
|
1 |
|
# Extract header and data rows |
163
|
|
|
header = rows[0] |
164
|
|
|
data = rows[1:] |
165
|
1 |
|
|
166
|
1 |
|
# Import items from the rows |
167
|
|
|
_itemize(header, data, document, mapping=mapping) |
168
|
|
|
|
169
|
1 |
|
|
170
|
|
|
def _file_tsv(path, document, mapping=None): |
171
|
|
|
"""Import items from a TSV export to a document. |
172
|
1 |
|
|
173
|
|
|
:param path: input file location |
174
|
|
|
:param document: document to import items |
175
|
|
|
:param mapping: dictionary mapping custom to standard attribute names |
176
|
|
|
|
177
|
|
|
""" |
178
|
|
|
_file_csv(path, document, delimiter='\t', mapping=mapping) |
179
|
|
|
|
180
|
1 |
|
|
181
|
|
|
def _file_xlsx(path, document, mapping=None): |
182
|
|
|
"""Import items from an XLSX export to a document. |
183
|
1 |
|
|
184
|
|
|
:param path: input file location |
185
|
|
|
:param document: document to import items |
186
|
|
|
:param mapping: dictionary mapping custom to standard attribute names |
187
|
|
|
|
188
|
|
|
""" |
189
|
|
|
header = [] |
190
|
|
|
data = [] |
191
|
1 |
|
|
192
|
1 |
|
# Parse the file |
193
|
|
|
log.debug("reading rows in {}...".format(path)) |
194
|
|
|
workbook = openpyxl.load_workbook(path, data_only=True) |
195
|
1 |
|
worksheet = workbook.active |
196
|
1 |
|
|
197
|
1 |
|
index = 0 |
198
|
|
|
|
199
|
|
|
# Extract header and data rows |
200
|
1 |
|
for index, row in enumerate(worksheet.iter_rows()): |
201
|
1 |
|
row2 = [] |
202
|
1 |
|
for cell in row: |
203
|
1 |
|
if index == 0: |
204
|
1 |
|
header.append(cell.value) |
205
|
|
|
else: |
206
|
1 |
|
row2.append(cell.value) |
207
|
1 |
|
if index: |
208
|
1 |
|
data.append(row2) |
209
|
|
|
|
210
|
|
|
# Warn about workbooks that may be sized incorrectly |
211
|
|
|
if index >= 2 ** 20 - 1: |
212
|
|
|
msg = "workbook contains the maximum number of rows" |
213
|
|
|
warnings.warn(msg, Warning) |
214
|
|
|
|
215
|
|
|
# Import items from the rows |
216
|
1 |
|
_itemize(header, data, document, mapping=mapping) |
217
|
|
|
|
218
|
|
|
|
219
|
1 |
|
def _itemize(header, data, document, mapping=None): |
220
|
|
|
"""Conversion function for multiple formats. |
221
|
|
|
|
222
|
|
|
:param header: list of columns names |
223
|
|
|
:param data: list of lists of row values |
224
|
|
|
:param document: document to import items |
225
|
|
|
:param mapping: dictionary mapping custom to standard attribute names |
226
|
|
|
|
227
|
|
|
""" |
228
|
1 |
|
log.info("converting rows to items...") |
229
|
1 |
|
log.debug("header: {}".format(header)) |
230
|
1 |
|
for row in data: |
231
|
1 |
|
log.debug("row: {}".format(row)) |
232
|
|
|
|
233
|
|
|
# Parse item attributes |
234
|
1 |
|
attrs = {} |
235
|
1 |
|
uid = None |
236
|
1 |
|
for index, value in enumerate(row): |
237
|
|
|
|
238
|
|
|
# Key lookup |
239
|
1 |
|
key = str(header[index]).lower().strip() if header[index] else '' |
240
|
1 |
|
if not key: |
241
|
1 |
|
continue |
242
|
|
|
|
243
|
|
|
# Map key to custom attributes names |
244
|
1 |
|
for custom, standard in (mapping or {}).items(): |
245
|
1 |
|
if key == custom.lower(): |
246
|
1 |
|
msg = "mapped: '{}' => '{}'".format(key, standard) |
247
|
1 |
|
log.debug(msg) |
248
|
1 |
|
key = standard |
249
|
1 |
|
break |
250
|
|
|
|
251
|
|
|
# Convert values for particular keys |
252
|
1 |
|
if key in ('uid', 'id'): # 'id' for backwards compatibility |
253
|
1 |
|
uid = value |
254
|
1 |
|
elif key == 'links': |
255
|
|
|
# split links into a list |
256
|
1 |
|
attrs[key] = _split_list(value) |
257
|
1 |
|
elif key == 'active': |
258
|
|
|
# require explicit disabling |
259
|
1 |
|
attrs['active'] = value is not False |
260
|
|
|
else: |
261
|
1 |
|
attrs[key] = value |
262
|
|
|
|
263
|
|
|
# Get the next UID if the row is a new item |
264
|
1 |
|
if attrs.get('text') and uid in (None, '', settings.PLACEHOLDER): |
265
|
1 |
|
uid = UID( |
266
|
|
|
document.prefix, document.sep, document.next_number, document.digits |
267
|
|
|
) |
268
|
|
|
|
269
|
1 |
|
# Convert the row to an item |
270
|
|
|
if uid and uid != settings.PLACEHOLDER: |
271
|
|
|
|
272
|
1 |
|
# Delete the old item |
273
|
1 |
|
try: |
274
|
1 |
|
item = document.find_item(uid) |
275
|
1 |
|
except DoorstopError: |
276
|
|
|
log.debug("not yet an item: {}".format(uid)) |
277
|
1 |
|
else: |
278
|
1 |
|
log.debug("deleting old item: {}".format(uid)) |
279
|
|
|
item.delete() |
280
|
|
|
|
281
|
1 |
|
# Import the item |
282
|
1 |
|
try: |
283
|
|
|
item = add_item(document.prefix, uid, attrs=attrs, document=document) |
284
|
1 |
|
except DoorstopError as exc: |
285
|
1 |
|
log.warning(exc) |
286
|
|
|
|
287
|
|
|
|
288
|
1 |
|
def _split_list(value): |
289
|
|
|
"""Split a string list into parts.""" |
290
|
1 |
|
if value: |
291
|
1 |
|
return [p for p in LIST_SEP_RE.split(value) if p] |
292
|
|
|
else: |
293
|
1 |
|
return [] |
294
|
|
|
|
295
|
|
|
|
296
|
|
|
# Mapping from file extension to file reader |
297
|
1 |
|
FORMAT_FILE = { |
298
|
|
|
'.yml': _file_yml, |
299
|
|
|
'.csv': _file_csv, |
300
|
|
|
'.tsv': _file_tsv, |
301
|
|
|
'.xlsx': _file_xlsx, |
302
|
|
|
} |
303
|
1 |
|
|
304
|
|
|
|
305
|
|
|
def check(ext): |
306
|
|
|
"""Confirm an extension is supported for import. |
307
|
|
|
|
308
|
|
|
:raise DoorstopError: for unknown formats |
309
|
|
|
|
310
|
|
|
:return: file importer if available |
311
|
1 |
|
|
312
|
1 |
|
""" |
313
|
1 |
|
exts = ', '.join(ext for ext in FORMAT_FILE) |
314
|
1 |
|
msg = "unknown import format: {} (options: {})".format(ext or None, exts) |
315
|
1 |
|
exc = DoorstopError(msg) |
316
|
1 |
|
try: |
317
|
1 |
|
func = FORMAT_FILE[ext] |
318
|
|
|
except KeyError: |
319
|
1 |
|
raise exc from None |
320
|
1 |
|
else: |
321
|
|
|
log.debug("found file reader for: {}".format(ext)) |
322
|
|
|
return func |
323
|
|
|
|