doorstop.core.importer._itemize() - Code Metrics - jacebrowning/doorstop - Measure and Improve Code Quality continuously with Scrutinizer

doorstop.core.importer._itemize() F
last analyzed 2019-08-12 00:16 UTC

↳ Parent: doorstop.core.importer

Complexity

Conditions

Size

Total Lines	67
Code Lines	38

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	37
CRAP Score	17

Importance

Changes

Metric	Value
eloc	38
dl	0
loc	67
ccs	37
cts	37
cp	1
rs	1.8
c	0
b	0
f	0
cc	17
nop	4
crap	17

How to fix Long Method Complexity

# SPDX-License-Identifier: LGPL-3.0-only

"""Functions to import exiting documents and items."""

import csv
import os
import re
import warnings
from typing import Any

import openpyxl

from doorstop import common, settings
from doorstop.common import DoorstopError
from doorstop.core.builder import _get_tree
from doorstop.core.document import Document
from doorstop.core.item import Item
from doorstop.core.types import UID

LIST_SEP_RE = re.compile(r"[\s;,]+")  # regex to split list strings into parts

_documents = []  # cache of unplaced documents

log = common.logger(__name__)


def import_file(path, document, ext=None, mapping=None, **kwargs):
    """Import items from an exported file.

    :param path: input file location
    :param document: document to import items
    :param ext: file extension to override input path's extension
    :param mapping: dictionary mapping custom to standard attribute names

    :raise DoorstopError: for unknown file formats

    :return: document with imported items

    """
    log.info("importing {} into {}...".format(path, document))
    ext = ext or os.path.splitext(path)[-1]
    func = check(ext)
    func(path, document, mapping=mapping, **kwargs)


def create_document(prefix, path, parent=None, tree=None):
    """Create a Doorstop document from existing document information.

    :param prefix: existing document's prefix (for new items)
    :param path: new directory path to store this document's items
    :param parent: parent document's prefix (if one will exist)
    :param tree: explicit tree to add the document

    :return: imported Document

    """
    if not tree:
        tree = _get_tree()

    # Attempt to create a document with the given parent
    log.info("importing document '{}'...".format(prefix))
    try:
        document = tree.create_document(path, prefix, parent=parent)
    except DoorstopError as exc:
        if not parent:
            raise exc from None  # pylint: disable=raising-bad-type

        # Create the document despite an unavailable parent
        document = Document.new(tree, path, tree.root, prefix, parent=parent)
        log.warning(exc)
        _documents.append(document)

    # TODO: attempt to place unplaced documents?

    log.info("imported: {}".format(document))
    return document


def add_item(prefix, uid, attrs=None, document=None, request_next_number=None):
    """Create a Doorstop document from existing document information.

    :param prefix: previously imported document's prefix
    :param uid: existing item's UID
    :param attrs: dictionary of Doorstop and custom attributes
    :param document: explicit document to add the item
    :param request_next_number: server method to get a document's next number

    :return: imported Item

    """
    if document:
        # Get an explicit tree
        tree = document.tree
        assert tree  # tree should be set internally
    else:
        # Get an implicit tree and document
        tree = _get_tree(request_next_number=request_next_number)
        document = tree.find_document(prefix)

    # Add an item using the specified UID
    log.info("importing item '{}'...".format(uid))
    item = Item.new(tree, document, document.path, document.root, uid, auto=False)
    for key, value in (attrs or {}).items():
        item.set(key, value)
    item.save()

    log.info("imported: {}".format(item))
    return item


def _file_yml(path, document, **_):
    """Import items from a YAML export to a document.

    :param path: input file location
    :param document: document to import items

    """
    # Parse the file
    log.info("reading items in {}...".format(path))
    text = common.read_text(path)
    # Load the YAML data
    data = common.load_yaml(text, path)
    # Add items
    for uid, attrs in data.items():
        try:
            item = document.find_item(uid)
        except DoorstopError:
            pass  # no matching item
        else:
            item.delete()
        add_item(document.prefix, uid, attrs=attrs, document=document)


def _file_csv(path, document, delimiter=',', mapping=None):
    """Import items from a CSV export to a document.

    :param path: input file location
    :param document: document to import items
    :param delimiter: CSV field delimiter
    :param mapping: dictionary mapping custom to standard attribute names

    """
    rows = []

    # Parse the file
    log.info("reading rows in {}...".format(path))
    with open(path, 'r', encoding='utf-8') as stream:
        reader = csv.reader(stream, delimiter=delimiter)
        for _row in reader:
            row = []
            value: Any
            for value in _row:
                # convert string booleans
                if isinstance(value, str):
                    if value.lower() == 'true':
                        value = True
                    elif value.lower() == 'false':
                        value = False
                row.append(value)
            rows.append(row)

    # Extract header and data rows
    header = rows[0]
    data = rows[1:]

    # Import items from the rows
    _itemize(header, data, document, mapping=mapping)


def _file_tsv(path, document, mapping=None):
    """Import items from a TSV export to a document.

    :param path: input file location
    :param document: document to import items
    :param mapping: dictionary mapping custom to standard attribute names

    """
    _file_csv(path, document, delimiter='\t', mapping=mapping)


def _file_xlsx(path, document, mapping=None):
    """Import items from an XLSX export to a document.

    :param path: input file location
    :param document: document to import items
    :param mapping: dictionary mapping custom to standard attribute names

    """
    header = []
    data = []

    # Parse the file
    log.debug("reading rows in {}...".format(path))
    workbook = openpyxl.load_workbook(path, data_only=True)
    worksheet = workbook.active

    index = 0

    # Extract header and data rows
    for index, row in enumerate(worksheet.iter_rows()):
        row2 = []
        for cell in row:
            if index == 0:
                header.append(cell.value)
            else:
                row2.append(cell.value)
        if index:
            data.append(row2)

    # Warn about workbooks that may be sized incorrectly
    if index >= 2 ** 20 - 1:
        msg = "workbook contains the maximum number of rows"
        warnings.warn(msg, Warning)

    # Import items from the rows
    _itemize(header, data, document, mapping=mapping)


def _itemize(header, data, document, mapping=None):
    """Conversion function for multiple formats.

    :param header: list of columns names
    :param data: list of lists of row values
    :param document: document to import items
    :param mapping: dictionary mapping custom to standard attribute names

    """
    log.info("converting rows to items...")
    log.debug("header: {}".format(header))
    for row in data:
        log.debug("row: {}".format(row))

        # Parse item attributes
        attrs = {}
        uid = None
        for index, value in enumerate(row):

            # Key lookup
            key = str(header[index]).lower().strip() if header[index] else ''
            if not key:
                continue

            # Map key to custom attributes names
            for custom, standard in (mapping or {}).items():
                if key == custom.lower():
                    msg = "mapped: '{}' => '{}'".format(key, standard)
                    log.debug(msg)
                    key = standard
                    break

            # Convert values for particular keys
            if key in ('uid', 'id'):  # 'id' for backwards compatibility
                uid = value
            elif key == 'links':
                # split links into a list
                attrs[key] = _split_list(value)
            elif key == 'active':
                # require explicit disabling
                attrs['active'] = value is not False
            else:
                attrs[key] = value

        # Get the next UID if the row is a new item
        if attrs.get('text') and uid in (None, '', settings.PLACEHOLDER):
            uid = UID(
                document.prefix, document.sep, document.next_number, document.digits
            )

        # Convert the row to an item
        if uid and uid != settings.PLACEHOLDER:

            # Delete the old item
            try:
                item = document.find_item(uid)
            except DoorstopError:
                log.debug("not yet an item: {}".format(uid))
            else:
                log.debug("deleting old item: {}".format(uid))
                item.delete()

            # Import the item
            try:
                item = add_item(document.prefix, uid, attrs=attrs, document=document)
            except DoorstopError as exc:
                log.warning(exc)


def _split_list(value):
    """Split a string list into parts."""
    if value:
        return [p for p in LIST_SEP_RE.split(value) if p]
    else:
        return []


# Mapping from file extension to file reader
FORMAT_FILE = {
    '.yml': _file_yml,
    '.csv': _file_csv,
    '.tsv': _file_tsv,
    '.xlsx': _file_xlsx,
}


def check(ext):
    """Confirm an extension is supported for import.

    :raise DoorstopError: for unknown formats

    :return: file importer if available

    """
    exts = ', '.join(ext for ext in FORMAT_FILE)
    msg = "unknown import format: {} (options: {})".format(ext or None, exts)
    exc = DoorstopError(msg)
    try:
        func = FORMAT_FILE[ext]
    except KeyError:
        raise exc from None
    else:
        log.debug("found file reader for: {}".format(ext))
        return func


1		# SPDX-License-Identifier: LGPL-3.0-only
2
3	1	"""Functions to import exiting documents and items."""
4	1
5	1	import csv
6	1	import os
7		import re
8	1	import warnings
9		from typing import Any
10	1
11	1	import openpyxl
12	1
13	1	from doorstop import common, settings
14	1	from doorstop.common import DoorstopError
15	1	from doorstop.core.builder import _get_tree
16	1	from doorstop.core.document import Document
17		from doorstop.core.item import Item
18		from doorstop.core.types import UID
19	1
20		LIST_SEP_RE = re.compile(r"[\s;,]+") # regex to split list strings into parts
21	1
22		_documents = [] # cache of unplaced documents
23	1
24		log = common.logger(__name__)
25
26	1
27		def import_file(path, document, ext=None, mapping=None, **kwargs):
28		"""Import items from an exported file.
29
30		:param path: input file location
31		:param document: document to import items
32		:param ext: file extension to override input path's extension
33		:param mapping: dictionary mapping custom to standard attribute names
34
35		:raise DoorstopError: for unknown file formats
36
37		:return: document with imported items
38
39	1	"""
40	1	log.info("importing {} into {}...".format(path, document))
41	1	ext = ext or os.path.splitext(path)[-1]
42	1	func = check(ext)
43		func(path, document, mapping=mapping, **kwargs)
44
45	1
46		def create_document(prefix, path, parent=None, tree=None):
47		"""Create a Doorstop document from existing document information.
48
49		:param prefix: existing document's prefix (for new items)
50		:param path: new directory path to store this document's items
51		:param parent: parent document's prefix (if one will exist)
52		:param tree: explicit tree to add the document
53
54		:return: imported Document
55
56	1	"""
57	1	if not tree:
58		tree = _get_tree()
59
60	1	# Attempt to create a document with the given parent
61	1	log.info("importing document '{}'...".format(prefix))
62	1	try:
63	1	document = tree.create_document(path, prefix, parent=parent)
64	1	except DoorstopError as exc:
65	1	if not parent:
66		raise exc from None # pylint: disable=raising-bad-type
67
68	1	# Create the document despite an unavailable parent
69		document = Document.new(tree, path, tree.root, prefix, parent=parent)
70		log.warning(exc)
71	1	_documents.append(document)
72	1
73		# TODO: attempt to place unplaced documents?
74
75		log.info("imported: {}".format(document))
76	1	return document
77	1
78
79		def add_item(prefix, uid, attrs=None, document=None, request_next_number=None):
80	1	"""Create a Doorstop document from existing document information.
81
82		:param prefix: previously imported document's prefix
83		:param uid: existing item's UID
84		:param attrs: dictionary of Doorstop and custom attributes
85		:param document: explicit document to add the item
86		:param request_next_number: server method to get a document's next number
87
88		:return: imported Item
89
90		"""
91		if document:
92	1	# Get an explicit tree
93		tree = document.tree
94	1	assert tree # tree should be set internally
95	1	else:
96		# Get an implicit tree and document
97		tree = _get_tree(request_next_number=request_next_number)
98	1	document = tree.find_document(prefix)
99	1
100		# Add an item using the specified UID
101		log.info("importing item '{}'...".format(uid))
102	1	item = Item.new(tree, document, document.path, document.root, uid, auto=False)
103	1	for key, value in (attrs or {}).items():
104		item.set(key, value)
105		item.save()
106	1
107	1	log.info("imported: {}".format(item))
108	1	return item
109
110	1
111	1	def _file_yml(path, document, **_):
112		"""Import items from a YAML export to a document.
113
114	1	:param path: input file location
115		:param document: document to import items
116
117		"""
118		# Parse the file
119		log.info("reading items in {}...".format(path))
120		text = common.read_text(path)
121		# Load the YAML data
122	1	data = common.load_yaml(text, path)
123	1	# Add items
124		for uid, attrs in data.items():
125	1	try:
126		item = document.find_item(uid)
127	1	except DoorstopError:
128	1	pass # no matching item
129	1	else:
130	1	item.delete()
131	1	add_item(document.prefix, uid, attrs=attrs, document=document)
132
133	1
134	1	def _file_csv(path, document, delimiter=',', mapping=None):
135		"""Import items from a CSV export to a document.
136
137	1	:param path: input file location
138		:param document: document to import items
139		:param delimiter: CSV field delimiter
140		:param mapping: dictionary mapping custom to standard attribute names
141
142		"""
143		rows = []
144
145		# Parse the file
146	1	log.info("reading rows in {}...".format(path))
147		with open(path, 'r', encoding='utf-8') as stream:
148		reader = csv.reader(stream, delimiter=delimiter)
149	1	for _row in reader:
150	1	row = []
151	1	value: Any
152	1	for value in _row:
153	1	# convert string booleans
154	1	if isinstance(value, str):
155		if value.lower() == 'true':
156	1	value = True
157	1	elif value.lower() == 'false':
158	1	value = False
159	1	row.append(value)
160	1	rows.append(row)
161	1
162	1	# Extract header and data rows
163		header = rows[0]
164		data = rows[1:]
165	1
166	1	# Import items from the rows
167		_itemize(header, data, document, mapping=mapping)
168
169	1
170		def _file_tsv(path, document, mapping=None):
171		"""Import items from a TSV export to a document.
172	1
173		:param path: input file location
174		:param document: document to import items
175		:param mapping: dictionary mapping custom to standard attribute names
176
177		"""
178		_file_csv(path, document, delimiter='\t', mapping=mapping)
179
180	1
181		def _file_xlsx(path, document, mapping=None):
182		"""Import items from an XLSX export to a document.
183	1
184		:param path: input file location
185		:param document: document to import items
186		:param mapping: dictionary mapping custom to standard attribute names
187
188		"""
189		header = []
190		data = []
191	1
192	1	# Parse the file
193		log.debug("reading rows in {}...".format(path))
194		workbook = openpyxl.load_workbook(path, data_only=True)
195	1	worksheet = workbook.active
196	1
197	1	index = 0
198
199		# Extract header and data rows
200	1	for index, row in enumerate(worksheet.iter_rows()):
201	1	row2 = []
202	1	for cell in row:
203	1	if index == 0:
204	1	header.append(cell.value)
205		else:
206	1	row2.append(cell.value)
207	1	if index:
208	1	data.append(row2)
209
210		# Warn about workbooks that may be sized incorrectly
211		if index >= 2 ** 20 - 1:
212		msg = "workbook contains the maximum number of rows"
213		warnings.warn(msg, Warning)
214
215		# Import items from the rows
216	1	_itemize(header, data, document, mapping=mapping)
217
218
219	1	def _itemize(header, data, document, mapping=None):
220		"""Conversion function for multiple formats.
221
222		:param header: list of columns names
223		:param data: list of lists of row values
224		:param document: document to import items
225		:param mapping: dictionary mapping custom to standard attribute names
226
227		"""
228	1	log.info("converting rows to items...")
229	1	log.debug("header: {}".format(header))
230	1	for row in data:
231	1	log.debug("row: {}".format(row))
232
233		# Parse item attributes
234	1	attrs = {}
235	1	uid = None
236	1	for index, value in enumerate(row):
237
238		# Key lookup
239	1	key = str(header[index]).lower().strip() if header[index] else ''
240	1	if not key:
241	1	continue
242
243		# Map key to custom attributes names
244	1	for custom, standard in (mapping or {}).items():
245	1	if key == custom.lower():
246	1	msg = "mapped: '{}' => '{}'".format(key, standard)
247	1	log.debug(msg)
248	1	key = standard
249	1	break
250
251		# Convert values for particular keys
252	1	if key in ('uid', 'id'): # 'id' for backwards compatibility
253	1	uid = value
254	1	elif key == 'links':
255		# split links into a list
256	1	attrs[key] = _split_list(value)
257	1	elif key == 'active':
258		# require explicit disabling
259	1	attrs['active'] = value is not False
260		else:
261	1	attrs[key] = value
262
263		# Get the next UID if the row is a new item
264	1	if attrs.get('text') and uid in (None, '', settings.PLACEHOLDER):
265	1	uid = UID(
266		document.prefix, document.sep, document.next_number, document.digits
267		)
268
269	1	# Convert the row to an item
270		if uid and uid != settings.PLACEHOLDER:
271
272	1	# Delete the old item
273	1	try:
274	1	item = document.find_item(uid)
275	1	except DoorstopError:
276		log.debug("not yet an item: {}".format(uid))
277	1	else:
278	1	log.debug("deleting old item: {}".format(uid))
279		item.delete()
280
281	1	# Import the item
282	1	try:
283		item = add_item(document.prefix, uid, attrs=attrs, document=document)
284	1	except DoorstopError as exc:
285	1	log.warning(exc)
286
287
288	1	def _split_list(value):
289		"""Split a string list into parts."""
290	1	if value:
291	1	return [p for p in LIST_SEP_RE.split(value) if p]
292		else:
293	1	return []
294
295
296		# Mapping from file extension to file reader
297	1	FORMAT_FILE = {
298		'.yml': _file_yml,
299		'.csv': _file_csv,
300		'.tsv': _file_tsv,
301		'.xlsx': _file_xlsx,
302		}
303	1
304
305		def check(ext):
306		"""Confirm an extension is supported for import.
307
308		:raise DoorstopError: for unknown formats
309
310		:return: file importer if available
311	1
312	1	"""
313	1	exts = ', '.join(ext for ext in FORMAT_FILE)
314	1	msg = "unknown import format: {} (options: {})".format(ext or None, exts)
315	1	exc = DoorstopError(msg)
316	1	try:
317	1	func = FORMAT_FILE[ext]
318		except KeyError:
319	1	raise exc from None
320	1	else:
321		log.debug("found file reader for: {}".format(ext))
322		return func
323

jacebrowning / doorstop

doorstop.core.importer._itemize() F last analyzed 2019-08-12 00:16 UTC

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like

doorstop.core.importer._itemize() F
last analyzed 2019-08-12 00:16 UTC