doorstop.core.exporter._tabulate() - Code Metrics - doorstop-dev/doorstop - Measure and Improve Code Quality continuously with Scrutinizer

doorstop.core.exporter._tabulate() F
last analyzed 2024-04-10 14:36 UTC

↳ Parent: doorstop.core.exporter

Complexity

Conditions

Size

Total Lines	82
Code Lines	50

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	50
dl	0
loc	82
rs	0
c	0
b	0
f	0
cc	20
nop	3

How to fix Long Method Complexity

# SPDX-License-Identifier: LGPL-3.0-only

"""Functions to export documents and items."""

import datetime
import os
from collections import defaultdict
from typing import Any, Dict

import openpyxl
import yaml

from doorstop import common, settings
from doorstop.common import DoorstopError
from doorstop.core.types import iter_documents, iter_items

LIST_SEP = "\n"  # string separating list values when joined in a string

XLSX_MAX_WIDTH = 65.0  # maximum width for a column
XLSX_FILTER_PADDING = 3.5  # column padding to account for filter button

log = common.logger(__name__)


def export(obj, path, ext=None, **kwargs):
    """Export an object to a given format.

    The function can be called in two ways:

    1. document or item-like object + output file path
    2. tree-like object + output directory path

    :param obj: (1) Item, list of Items, Document or (2) Tree
    :param path: (1) output file path or (2) output directory path
    :param ext: file extension to override output extension

    :raises: :class:`doorstop.common.DoorstopError` for unknown file formats

    :return: output location if files created, else None

    """
    # Determine the output format
    ext = ext or os.path.splitext(path)[-1] or ".csv"
    check(ext)

    # Export documents
    count = 0
    for obj2, path2 in iter_documents(obj, path, ext):
        count += 1

        # Export content to the specified path
        common.create_dirname(path2)
        log.info("exporting to {}...".format(path2))
        if ext in FORMAT_LINES:
            lines = export_lines(obj2, ext, **kwargs)
            common.write_lines(lines, path2, end=settings.WRITE_LINESEPERATOR)
        else:
            export_file(obj2, path2, ext, **kwargs)

    # Return the exported path
    if count:
        msg = "exported to {} file{}".format(count, "s" if count > 1 else "")
        log.info(msg)
        return path
    else:
        log.warning("nothing to export")
        return None


def export_lines(obj, ext=".yml", **kwargs):
    """Yield lines for an export in the specified format.

    :param obj: Item, list of Items, or Document to export
    :param ext: file extension to specify the output format

    :raises: :class:`doorstop.common.DoorstopError` for unknown file formats

    :return: lines generator

    """
    gen = check(ext, get_lines_gen=True)
    log.debug("yielding {} as lines of {}...".format(obj, ext))
    yield from gen(obj, **kwargs)


def export_file(obj, path, ext=None, **kwargs):
    """Create a file object for an export in the specified format.

    :param obj: Item, list of Items, or Document to export
    :param path: output file location with desired extension
    :param ext: file extension to override output path's extension

    :raises: :class:`doorstop.common.DoorstopError` for unknown file formats

    :return: path to created file

    """
    ext = ext or os.path.splitext(path)[-1]
    func = check(ext, get_file_func=True)
    log.debug("converting %s to file format %s...", obj, ext)
    try:
        return func(obj, path, **kwargs)
    except IOError:
        msg = "unable to write to: {}".format(path)
        raise common.DoorstopFileError(msg) from None


def _lines_yaml(obj, **_):
    """Yield lines for a YAML export.

    :param obj: Item, list of Items, or Document to export

    :return: iterator of lines of text

    """
    for item in iter_items(obj):
        data = {str(item.uid): item.data}
        text = yaml.dump(data, default_flow_style=False, allow_unicode=True)
        yield text


def _tabulate(obj, sep=LIST_SEP, auto=False):
    """Yield lines of header/data for tabular export.

    :param obj: Item, list of Items, or Document to export
    :param sep: string separating list values when joined in a string
    :param auto: include placeholders for new items on import

    :return: iterator of rows of data

    """

    header = ["level", "text", "ref", "links"]

    # 'at_least_one_ref' detects if at least one of the items still have a deprecated 'ref' field.
    # If there is none, 'ref' header is excluded from the headers and is not exported.
    at_least_one_ref = False
    for item in iter_items(obj):
        data = item.data

        for value in sorted(data.keys()):
            if value not in header:
                header.append(value)

        ref_value = data.get("ref")
        if ref_value:
            at_least_one_ref = True

    try:
        reference_index = header.index("references")

        # Inserting 'references' header after the 'ref' header.
        header.insert(3, header.pop(reference_index))

        if not at_least_one_ref:
            header.remove("ref")
    except ValueError:
        pass

    yield ["uid"] + header

    for item in iter_items(obj):
        data = item.data

        # Yield row
        row = [item.uid]
        for key in header:
            value = data.get(key)
            if key == "level":
                # some levels are floats for YAML presentation
                value = str(value)
            elif key == "links":
                # separate identifiers with a delimiter
                value = sep.join(uid.string for uid in item.links)
            elif key == "references":
                if value is None:
                    value = ""
                else:
                    ref_strings = []
                    for ref_item in value:
                        ref_type = ref_item["type"]
                        ref_path = ref_item["path"]

                        ref_string = "type:{},path:{}".format(ref_type, ref_path)

                        if "keyword" in ref_item:
                            keyword = ref_item["keyword"]
                            ref_string += ",keyword:{}".format(keyword)

                        ref_strings.append(ref_string)
                    value = "\n".join(ref_string for ref_string in ref_strings)
            elif isinstance(value, str) and key not in ("reviewed",):
                # remove sentence boundaries and line wrapping
                value = item.get(key)
            elif value is None:
                value = ""
            row.append(value)
        yield row

    # Yield placeholders for new items
    if auto:
        for _ in range(settings.PLACEHOLDER_COUNT):
            yield [settings.PLACEHOLDER]


def _file_csv(obj, path, delimiter=",", auto=False):
    """Create a CSV file at the given path.

    :param obj: Item, list of Items, or Document to export
    :param path: location to export CSV file
    :param delimiter: character to delimit fields
    :param auto: include placeholders for new items on import

    :return: path of created file

    """
    return common.write_csv(
        _tabulate(obj, auto=auto),
        path,
        delimiter=delimiter,
        newline="",
        encoding="utf-8",
    )


def _file_tsv(obj, path, auto=False):
    """Create a TSV file at the given path.

    :param obj: Item, list of Items, or Document to export
    :param path: location to export TSV file
    :param auto: include placeholders for new items on import

    :return: path of created file

    """
    return _file_csv(obj, path, delimiter="\t", auto=auto)


def _file_xlsx(obj, path, auto=False):
    """Create an XLSX file at the given path.

    :param obj: Item, list of Items, or Document to export
    :param path: location to export XLSX file
    :param auto: include placeholders for new items on import

    :return: path of created file

    """
    workbook = _get_xlsx(obj, auto)
    workbook.save(path)

    return path


def _get_xlsx(obj, auto):
    """Create an XLSX workbook object.

    :param obj: Item, list of Items, or Document to export
    :param auto: include placeholders for new items on import

    :return: new workbook

    """
    col_widths: Dict[Any, float] = defaultdict(float)
    col = "A"

    # Create a new workbook
    workbook = openpyxl.Workbook()
    worksheet = workbook.active

    # Populate cells
    for row, data in enumerate(_tabulate(obj, auto=auto), start=1):
        for col_idx, value in enumerate(data, start=1):
            cell = worksheet.cell(column=col_idx, row=row)

            # wrap text in every cell
            alignment = openpyxl.styles.Alignment(
                vertical="top", horizontal="left", wrap_text=True
            )
            cell.alignment = alignment
            # and bold header rows
            if row == 1:
                cell.font = openpyxl.styles.Font(bold=True)

            # convert incompatible Excel types:
            # http://pythonhosted.org/openpyxl/api.html#openpyxl.cell.Cell.value
            if isinstance(value, (int, float, datetime.datetime)):
                cell.value = value
            else:
                cell.value = str(value)

            # track cell width
            col_widths[col_idx] = max(col_widths[col_idx], _width(str(value)))

    # Add filter up to the last column
    col_letter = openpyxl.utils.get_column_letter(len(col_widths))
    worksheet.auto_filter.ref = "A1:%s1" % col_letter

    # Set column width based on column contents
    for col in col_widths:
        if col_widths[col] > XLSX_MAX_WIDTH:
            width = XLSX_MAX_WIDTH
        else:
            width = col_widths[col] + XLSX_FILTER_PADDING
        col_letter = openpyxl.utils.get_column_letter(col)
        worksheet.column_dimensions[col_letter].width = width

    # Freeze top row
    worksheet.freeze_panes = worksheet.cell(row=2, column=1)

    return workbook


def _width(text):
    """Get the maximum length in a multiline string."""
    if text:
        return max(len(line) for line in text.splitlines())
    else:
        return 0


# Mapping from file extension to lines generator
FORMAT_LINES = {".yml": _lines_yaml}
# Mapping from file extension to file generator
FORMAT_FILE = {".csv": _file_csv, ".tsv": _file_tsv, ".xlsx": _file_xlsx}
# Union of format dictionaries
FORMAT = dict(list(FORMAT_LINES.items()) + list(FORMAT_FILE.items()))  # type: ignore


def check(ext, get_lines_gen=False, get_file_func=False):
    """Confirm an extension is supported for export.

    :param get_lines_func: return a lines generator if available
    :param get_file_func: return a file creator if available

    :raises: :class:`doorstop.common.DoorstopError` for unknown formats

    :return: function requested if available

    """
    exts = ", ".join(ext for ext in FORMAT)
    lines_exts = ", ".join(ext for ext in FORMAT_LINES)
    file_exts = ", ".join(ext for ext in FORMAT_FILE)
    fmt = "unknown {{}} format: {} (options: {{}})".format(ext or None)

    if get_lines_gen:
        try:
            gen = FORMAT_LINES[ext]
        except KeyError:
            exc = DoorstopError(fmt.format("lines export", lines_exts))
            raise exc from None
        else:
            log.debug("found lines generator for: {}".format(ext))
            return gen

    if get_file_func:
        try:
            func = FORMAT_FILE[ext]
        except KeyError:
            exc = DoorstopError(fmt.format("file export", file_exts))
            raise exc from None
        else:
            log.debug("found file creator for: {}".format(ext))
            return func

    if ext not in FORMAT:
        exc = DoorstopError(fmt.format("export", exts))
        raise exc

    return None


1			# SPDX-License-Identifier: LGPL-3.0-only
2
3			"""Functions to export documents and items."""
4
5			import datetime
6			import os
7			from collections import defaultdict
8			from typing import Any, Dict
9
10			import openpyxl
11			import yaml
12
13			from doorstop import common, settings
14			from doorstop.common import DoorstopError
15			from doorstop.core.types import iter_documents, iter_items
16
17			LIST_SEP = "\n" # string separating list values when joined in a string
18
19			XLSX_MAX_WIDTH = 65.0 # maximum width for a column
20			XLSX_FILTER_PADDING = 3.5 # column padding to account for filter button
21
22			log = common.logger(__name__)
23
24
25			def export(obj, path, ext=None, **kwargs):
26			"""Export an object to a given format.
27
28			The function can be called in two ways:
29
30			1. document or item-like object + output file path
31			2. tree-like object + output directory path
32
33			:param obj: (1) Item, list of Items, Document or (2) Tree
34			:param path: (1) output file path or (2) output directory path
35			:param ext: file extension to override output extension
36
37			:raises: :class:`doorstop.common.DoorstopError` for unknown file formats
38
39			:return: output location if files created, else None
40
41			"""
42			# Determine the output format
43			ext = ext or os.path.splitext(path)[-1] or ".csv"
44			check(ext)
45
46			# Export documents
47			count = 0
48			for obj2, path2 in iter_documents(obj, path, ext):
49			count += 1
50
51			# Export content to the specified path
52			common.create_dirname(path2)
53			log.info("exporting to {}...".format(path2))
54			if ext in FORMAT_LINES:
55			lines = export_lines(obj2, ext, **kwargs)
56			common.write_lines(lines, path2, end=settings.WRITE_LINESEPERATOR)
57			else:
58			export_file(obj2, path2, ext, **kwargs)
59
60			# Return the exported path
61			if count:
62			msg = "exported to {} file{}".format(count, "s" if count > 1 else "")
63			log.info(msg)
64			return path
65			else:
66			log.warning("nothing to export")
67			return None
68
69
70			def export_lines(obj, ext=".yml", **kwargs):
71			"""Yield lines for an export in the specified format.
72
73			:param obj: Item, list of Items, or Document to export
74			:param ext: file extension to specify the output format
75
76			:raises: :class:`doorstop.common.DoorstopError` for unknown file formats
77
78			:return: lines generator
79
80			"""
81			gen = check(ext, get_lines_gen=True)
82			log.debug("yielding {} as lines of {}...".format(obj, ext))
83			yield from gen(obj, **kwargs)
84
85
86			def export_file(obj, path, ext=None, **kwargs):
87			"""Create a file object for an export in the specified format.
88
89			:param obj: Item, list of Items, or Document to export
90			:param path: output file location with desired extension
91			:param ext: file extension to override output path's extension
92
93			:raises: :class:`doorstop.common.DoorstopError` for unknown file formats
94
95			:return: path to created file
96
97			"""
98			ext = ext or os.path.splitext(path)[-1]
99			func = check(ext, get_file_func=True)
100			log.debug("converting %s to file format %s...", obj, ext)
101			try:
102			return func(obj, path, **kwargs)
103			except IOError:
104			msg = "unable to write to: {}".format(path)
105			raise common.DoorstopFileError(msg) from None
106
107
108			def _lines_yaml(obj, **_):
109			"""Yield lines for a YAML export.
110
111			:param obj: Item, list of Items, or Document to export
112
113			:return: iterator of lines of text
114
115			"""
116			for item in iter_items(obj):
117			data = {str(item.uid): item.data}
118			text = yaml.dump(data, default_flow_style=False, allow_unicode=True)
119			yield text
120
121
122			def _tabulate(obj, sep=LIST_SEP, auto=False):
123			"""Yield lines of header/data for tabular export.
124
125			:param obj: Item, list of Items, or Document to export
126			:param sep: string separating list values when joined in a string
127			:param auto: include placeholders for new items on import
128
129			:return: iterator of rows of data
130
131			"""
132
133			header = ["level", "text", "ref", "links"]
134
135			# 'at_least_one_ref' detects if at least one of the items still have a deprecated 'ref' field.
136			# If there is none, 'ref' header is excluded from the headers and is not exported.
137			at_least_one_ref = False
138			for item in iter_items(obj):
139			data = item.data
140
141			for value in sorted(data.keys()):
142			if value not in header:
143			header.append(value)
144
145			ref_value = data.get("ref")
146			if ref_value:
147			at_least_one_ref = True
148
149			try:
150			reference_index = header.index("references")
151
152			# Inserting 'references' header after the 'ref' header.
153			header.insert(3, header.pop(reference_index))
154
155			if not at_least_one_ref:
156			header.remove("ref")
157			except ValueError:
158			pass
159
160			yield ["uid"] + header
161
162			for item in iter_items(obj):
163			data = item.data
164
165			# Yield row
166			row = [item.uid]
167			for key in header:
168			value = data.get(key)
169			if key == "level":
170			# some levels are floats for YAML presentation
171			value = str(value)
172			elif key == "links":
173			# separate identifiers with a delimiter
174			value = sep.join(uid.string for uid in item.links)
175			elif key == "references":
176			if value is None:
177			value = ""
178			else:
179			ref_strings = []
180			for ref_item in value:
181			ref_type = ref_item["type"]
182			ref_path = ref_item["path"]
183
184			ref_string = "type:{},path:{}".format(ref_type, ref_path)
185
186			if "keyword" in ref_item:
187			keyword = ref_item["keyword"]
188			ref_string += ",keyword:{}".format(keyword)
189
190			ref_strings.append(ref_string)
191			value = "\n".join(ref_string for ref_string in ref_strings)
192			elif isinstance(value, str) and key not in ("reviewed",):
193			# remove sentence boundaries and line wrapping
194			value = item.get(key)
195			elif value is None:
196			value = ""
197			row.append(value)
198			yield row
199
200			# Yield placeholders for new items
201			if auto:
202			for _ in range(settings.PLACEHOLDER_COUNT):
203			yield [settings.PLACEHOLDER]
204
205
206			def _file_csv(obj, path, delimiter=",", auto=False):
207			"""Create a CSV file at the given path.
208
209			:param obj: Item, list of Items, or Document to export
210			:param path: location to export CSV file
211			:param delimiter: character to delimit fields
212			:param auto: include placeholders for new items on import
213
214			:return: path of created file
215
216			"""
217			return common.write_csv(
218			_tabulate(obj, auto=auto),
219			path,
220			delimiter=delimiter,
221			newline="",
222			encoding="utf-8",
223			)
224
225
226			def _file_tsv(obj, path, auto=False):
227			"""Create a TSV file at the given path.
228
229			:param obj: Item, list of Items, or Document to export
230			:param path: location to export TSV file
231			:param auto: include placeholders for new items on import
232
233			:return: path of created file
234
235			"""
236			return _file_csv(obj, path, delimiter="\t", auto=auto)
237
238
239			def _file_xlsx(obj, path, auto=False):
240			"""Create an XLSX file at the given path.
241
242			:param obj: Item, list of Items, or Document to export
243			:param path: location to export XLSX file
244			:param auto: include placeholders for new items on import
245
246			:return: path of created file
247
248			"""
249			workbook = _get_xlsx(obj, auto)
250			workbook.save(path)
251
252			return path
253
254
255			def _get_xlsx(obj, auto):
256			"""Create an XLSX workbook object.
257
258			:param obj: Item, list of Items, or Document to export
259			:param auto: include placeholders for new items on import
260
261			:return: new workbook
262
263			"""
264			col_widths: Dict[Any, float] = defaultdict(float)
265			col = "A"
266
267			# Create a new workbook
268			workbook = openpyxl.Workbook()
269			worksheet = workbook.active
270
271			# Populate cells
272			for row, data in enumerate(_tabulate(obj, auto=auto), start=1):
273			for col_idx, value in enumerate(data, start=1):
274			cell = worksheet.cell(column=col_idx, row=row)
275
276			# wrap text in every cell
277			alignment = openpyxl.styles.Alignment(
278			vertical="top", horizontal="left", wrap_text=True
279			)
280			cell.alignment = alignment
281			# and bold header rows
282			if row == 1:
283			cell.font = openpyxl.styles.Font(bold=True)
284
285			# convert incompatible Excel types:
286			# http://pythonhosted.org/openpyxl/api.html#openpyxl.cell.Cell.value
287			if isinstance(value, (int, float, datetime.datetime)):
288			cell.value = value
289			else:
290			cell.value = str(value)
291
292			# track cell width
293			col_widths[col_idx] = max(col_widths[col_idx], _width(str(value)))
294
295			# Add filter up to the last column
296			col_letter = openpyxl.utils.get_column_letter(len(col_widths))
297			worksheet.auto_filter.ref = "A1:%s1" % col_letter
298
299			# Set column width based on column contents
300			for col in col_widths:
301			if col_widths[col] > XLSX_MAX_WIDTH:
302			width = XLSX_MAX_WIDTH
303			else:
304			width = col_widths[col] + XLSX_FILTER_PADDING
305			col_letter = openpyxl.utils.get_column_letter(col)
306			worksheet.column_dimensions[col_letter].width = width
307
308			# Freeze top row
309			worksheet.freeze_panes = worksheet.cell(row=2, column=1)
310
311			return workbook
312
313
314			def _width(text):
315			"""Get the maximum length in a multiline string."""
316			if text:
317			return max(len(line) for line in text.splitlines())
318			else:
319			return 0
320
321
322			# Mapping from file extension to lines generator
323			FORMAT_LINES = {".yml": _lines_yaml}
324			# Mapping from file extension to file generator
325			FORMAT_FILE = {".csv": _file_csv, ".tsv": _file_tsv, ".xlsx": _file_xlsx}
326			# Union of format dictionaries
327			FORMAT = dict(list(FORMAT_LINES.items()) + list(FORMAT_FILE.items())) # type: ignore
328
329
330			def check(ext, get_lines_gen=False, get_file_func=False):
331			"""Confirm an extension is supported for export.
332
333			:param get_lines_func: return a lines generator if available
334			:param get_file_func: return a file creator if available
335
336			:raises: :class:`doorstop.common.DoorstopError` for unknown formats
337
338			:return: function requested if available
339
340			"""
341			exts = ", ".join(ext for ext in FORMAT)
342			lines_exts = ", ".join(ext for ext in FORMAT_LINES)
343			file_exts = ", ".join(ext for ext in FORMAT_FILE)
344			fmt = "unknown {{}} format: {} (options: {{}})".format(ext or None)
345
346			if get_lines_gen:
347			try:
348			gen = FORMAT_LINES[ext]
349			except KeyError:
350			exc = DoorstopError(fmt.format("lines export", lines_exts))
351			raise exc from None
352			else:
353			log.debug("found lines generator for: {}".format(ext))
354			return gen
355
356			if get_file_func:
357			try:
358			func = FORMAT_FILE[ext]
359			except KeyError:
360			exc = DoorstopError(fmt.format("file export", file_exts))
361			raise exc from None
362			else:
363			log.debug("found file creator for: {}".format(ext))
364			return func
365
366			if ext not in FORMAT:
367			exc = DoorstopError(fmt.format("export", exts))
368			raise exc
369
370			return None
371

doorstop-dev / doorstop

doorstop.core.exporter._tabulate() F last analyzed 2024-04-10 14:36 UTC

Complexity

Size

Duplication

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like

doorstop.core.exporter._tabulate() F
last analyzed 2024-04-10 14:36 UTC