typeddfs.utils.io_utils - Code Metrics - dmyersturnbull/typed-dfs - Measure and Improve Code Quality continuously with Scrutinizer

typeddfs.utils.io_utils D
last analyzed 2023-10-24 08:21 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	246
Duplicated Lines	26.83 %

Importance

Changes

Metric	Value
eloc	161
dl	66
loc	246
rs	4.08
c	0
b	0
f	0
wmc	59

10 Methods

Rating	Name	Duplication	Size	Complexity
A	IoUtils.path_or_buff_compression()	0	7	3
C	IoUtils.write()	0	33	9
A	IoUtils.read()	0	11	2
A	IoUtils.is_binary()	0	6	2
A	IoUtils.get_encoding_errors()	0	22	3
C	IoUtils.verify_can_read_files()	33	33	10
B	IoUtils.verify_can_write_dirs()	0	25	8
C	IoUtils.verify_can_write_files()	33	33	10
C	IoUtils.get_encoding()	0	28	11
A	IoUtils.tmp_path()	0	6	1

How to fix Duplicated Code Complexity

# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to typed-dfs
# SPDX-PackageHomePage: https://github.com/dmyersturnbull/typed-dfs
# SPDX-License-Identifier: Apache-2.0
"""
Tools for IO.
"""
from __future__ import annotations

import os
import sys
from datetime import datetime
from pathlib import Path, PurePath
from typing import TYPE_CHECKING, Any

from pandas.io.common import get_handle

from typeddfs.df_errors import (
    ReadPermissionsError,
    UnsupportedOperationError,
    WritePermissionsError,
)
from typeddfs.file_formats import CompressionFormat, FileFormat
from typeddfs.utils._utils import PathLike

if TYPE_CHECKING:
    from pandas._typing import BaseBuffer, FilePath


class IoUtils:
    @classmethod

    def verify_can_read_files(
        cls,
        *paths: str | Path,
        missing_ok: bool = False,
        attempt: bool = False,
    ) -> None:
        """
        Checks that all files can be written to, to ensure atomicity before operations.

        Args:
            *paths: The files
            missing_ok: Don't raise an error if a path doesn't exist
            attempt: Actually try opening

        Returns:
            ReadPermissionsError: If a path is not a file (modulo existence) or doesn't have 'W' set
        """
        paths = [Path(p) for p in paths]
        for path in paths:
            if path.exists() and not path.is_file():
                msg = f"Path {path} is not a file"
                raise ReadPermissionsError(msg, key=str(path))
            if (not missing_ok or path.exists()) and not os.access(path, os.R_OK):
                msg = f"Cannot read from {path}"
                raise ReadPermissionsError(msg, key=str(path))
            if attempt:
                try:
                    with path.open():
                        pass
                except OSError as e:
                    msg = f"Failed to open {path} for read"
                    raise WritePermissionsError(msg, key=str(path)) from e

    @classmethod

    def verify_can_write_files(
        cls,
        *paths: str | Path,
        missing_ok: bool = False,
        attempt: bool = False,
    ) -> None:
        """
        Checks that all files can be written to, to ensure atomicity before operations.

        Args:
            *paths: The files
            missing_ok: Don't raise an error if a path doesn't exist
            attempt: Actually try opening

        Returns:
            WritePermissionsError: If a path is not a file (modulo existence) or doesn't have 'W' set
        """
        paths = [Path(p) for p in paths]
        for path in paths:
            if path.exists() and not path.is_file():
                msg = f"Path {path} is not a file"
                raise WritePermissionsError(msg, key=str(path))
            if (not missing_ok or path.exists()) and not os.access(path, os.W_OK):
                msg = f"Cannot write to {path}"
                raise WritePermissionsError(msg, key=str(path))
            if attempt:
                try:
                    with path.open("a"):  # or w
                        pass
                except OSError as e:
                    msg = f"Failed to open {path} for write"
                    raise WritePermissionsError(msg, key=str(path)) from e

    @classmethod
    def verify_can_write_dirs(cls, *paths: str | Path, missing_ok: bool = False) -> None:
        """
        Checks that all directories can be written to, to ensure atomicity before operations.

        Args:
            *paths: The directories
            missing_ok: Don't raise an error if a path doesn't exist

        Returns:
            WritePermissionsError: If a path is not a directory (modulo existence) or doesn't have 'W' set
        """
        paths = [Path(p) for p in paths]
        for path in paths:
            if path.exists() and not path.is_dir():
                msg = f"Path {path} is not a dir"
                raise WritePermissionsError(msg, key=str(path))
            if missing_ok and not path.exists():
                continue
            if not os.access(path, os.W_OK):
                msg = f"{path} lacks write permission"
                raise WritePermissionsError(msg, key=str(path))
            if not os.access(path, os.X_OK):
                msg = f"{path} lacks access permission"
                raise WritePermissionsError(msg, key=str(path))

    @classmethod
    def write(
        cls,
        path_or_buff: FilePath | BaseBuffer,
        content,
        *,
        mode: str = "w",
        atomic: bool = False,
        compression_kwargs: dict[str, Any] | None = None,
        **kwargs,
    ) -> str | None:
        """
        Writes using Pandas's ``get_handle``.
        By default (unless ``compression=`` is set), infers the compression type from the filename suffix
        (e.g. ``.csv.gz``).
        """
        if compression_kwargs is None:
            compression_kwargs = {}
        if atomic and "a" in mode:
            msg = "Can't append in atomic write"
            raise UnsupportedOperationError(msg)
        if path_or_buff is None:
            return content
        compression = cls.path_or_buff_compression(path_or_buff, kwargs)
        kwargs = {**kwargs, "compression": compression.pandas_value}
        if atomic and isinstance(path_or_buff, PathLike):
            path = Path(path_or_buff)
            tmp = cls.tmp_path(path)
            with get_handle(tmp, mode, **kwargs) as f:
                f.handle.write(content)
                os.replace(tmp, path)
        with get_handle(path_or_buff, mode, **kwargs) as f:
            f.handle.write(content)

    @classmethod
    def read(cls, path_or_buff, *, mode: str = "r", **kwargs) -> str:
        """
        Reads using Pandas's ``get_handle``.
        By default (unless ``compression=`` is set), infers the compression type from the filename suffix.
        (e.g. ``.csv.gz``).
        """
        compression = cls.path_or_buff_compression(path_or_buff, kwargs)
        kwargs = {**kwargs, "compression": compression.pandas_value}
        with get_handle(path_or_buff, mode, **kwargs) as f:
            return f.handle.read()

    @classmethod
    def path_or_buff_compression(cls, path_or_buff, kwargs) -> CompressionFormat:
        if "compression" in kwargs:
            return CompressionFormat.of(kwargs["compression"])
        elif isinstance(path_or_buff, PurePath | str):
            return CompressionFormat.from_path(path_or_buff)
        return CompressionFormat.none

    @classmethod
    def is_binary(cls, path: PathLike) -> bool:
        path = Path(path)
        if CompressionFormat.from_path(path).is_compressed:
            return True
        return FileFormat.from_path(path).is_binary

    @classmethod
    def tmp_path(cls, path: PathLike, extra: str = "tmp") -> Path:
        now = datetime.now().isoformat(timespec="ns").replace(":", "").replace("-", "")
        path = Path(path)
        suffix = "".join(path.suffixes)
        return path.parent / (".__" + extra + "." + now + suffix)

    @classmethod
    def get_encoding(cls, encoding: str = "utf-8") -> str:
        """
        Returns a text encoding from a more flexible string.
        Ignores hyphens and lowercases the string.
        Permits these nonstandard shorthands:

          - ``"platform"``: use ``sys.getdefaultencoding()`` on the fly
          - ``"utf8(bom)"``: use ``"utf-8-sig"`` on Windows; ``"utf-8"`` otherwise
          - ``"utf16(bom)"``: use ``"utf-16-sig"`` on Windows; ``"utf-16"`` otherwise
          - ``"utf32(bom)"``: use ``"utf-32-sig"`` on Windows; ``"utf-32"`` otherwise
        """
        e = encoding.lower().replace("-", "")
        if e == "platform":
            return sys.getdefaultencoding()
        if e == "utf8(bom)":
            return "utf-8-sig" if os.name == "nt" else "utf-8"
        if e == "utf16(bom)":
            return "utf-16-sig" if os.name == "nt" else "utf-16"
        if e == "utf32(bom)":
            return "utf-32-sig" if os.name == "nt" else "utf-32"
        if e in {"utf8", "utf-8"}:
            return "utf-8"
        if e in {"utf16", "utf-16"}:
            return "utf-16"
        if e in {"utf32", "utf-32"}:
            return "utf-32"
        return encoding

    @classmethod
    def get_encoding_errors(cls, errors: str | None) -> str | None:
        """
        Returns the value passed as``errors=`` in ``open``.
        Raises:
            ValueError: If invalid
        """
        if errors is None:
            return "strict"
        if errors in (
            "strict",
            "ignore",
            "replace",
            "xmlcharrefreplace",
            "backslashreplace",
            "namereplace",
            "surrogateescape",
            "surrogatepass",
        ):
            return errors
        msg = f"Invalid value {errors} for errors"
        raise ValueError(msg)


__all__ = ["IoUtils"]


1		# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to typed-dfs
2		# SPDX-PackageHomePage: https://github.com/dmyersturnbull/typed-dfs
3		# SPDX-License-Identifier: Apache-2.0
4		"""
5		Tools for IO.
6		"""
7		from __future__ import annotations
8
9		import os
10		import sys
11		from datetime import datetime
12		from pathlib import Path, PurePath
13		from typing import TYPE_CHECKING, Any
14
15		from pandas.io.common import get_handle
16
17		from typeddfs.df_errors import (
18		ReadPermissionsError,
19		UnsupportedOperationError,
20		WritePermissionsError,
21		)
22		from typeddfs.file_formats import CompressionFormat, FileFormat
23		from typeddfs.utils._utils import PathLike
24
25		if TYPE_CHECKING:
26		from pandas._typing import BaseBuffer, FilePath
27
28
29		class IoUtils:
30	View Code Duplication	@classmethod
		0 ignored issues – show Duplication introduced 2023-07-12 21:29 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
31		def verify_can_read_files(
32		cls,
33		*paths: str \| Path,
34		missing_ok: bool = False,
35		attempt: bool = False,
36		) -> None:
37		"""
38		Checks that all files can be written to, to ensure atomicity before operations.
39
40		Args:
41		*paths: The files
42		missing_ok: Don't raise an error if a path doesn't exist
43		attempt: Actually try opening
44
45		Returns:
46		ReadPermissionsError: If a path is not a file (modulo existence) or doesn't have 'W' set
47		"""
48		paths = [Path(p) for p in paths]
49		for path in paths:
50		if path.exists() and not path.is_file():
51		msg = f"Path {path} is not a file"
52		raise ReadPermissionsError(msg, key=str(path))
53		if (not missing_ok or path.exists()) and not os.access(path, os.R_OK):
54		msg = f"Cannot read from {path}"
55		raise ReadPermissionsError(msg, key=str(path))
56		if attempt:
57		try:
58		with path.open():
59		pass
60		except OSError as e:
61		msg = f"Failed to open {path} for read"
62		raise WritePermissionsError(msg, key=str(path)) from e
63
64	View Code Duplication	@classmethod
		0 ignored issues – show Duplication introduced 2023-07-12 21:29 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
65		def verify_can_write_files(
66		cls,
67		*paths: str \| Path,
68		missing_ok: bool = False,
69		attempt: bool = False,
70		) -> None:
71		"""
72		Checks that all files can be written to, to ensure atomicity before operations.
73
74		Args:
75		*paths: The files
76		missing_ok: Don't raise an error if a path doesn't exist
77		attempt: Actually try opening
78
79		Returns:
80		WritePermissionsError: If a path is not a file (modulo existence) or doesn't have 'W' set
81		"""
82		paths = [Path(p) for p in paths]
83		for path in paths:
84		if path.exists() and not path.is_file():
85		msg = f"Path {path} is not a file"
86		raise WritePermissionsError(msg, key=str(path))
87		if (not missing_ok or path.exists()) and not os.access(path, os.W_OK):
88		msg = f"Cannot write to {path}"
89		raise WritePermissionsError(msg, key=str(path))
90		if attempt:
91		try:
92		with path.open("a"): # or w
93		pass
94		except OSError as e:
95		msg = f"Failed to open {path} for write"
96		raise WritePermissionsError(msg, key=str(path)) from e
97
98		@classmethod
99		def verify_can_write_dirs(cls, *paths: str \| Path, missing_ok: bool = False) -> None:
100		"""
101		Checks that all directories can be written to, to ensure atomicity before operations.
102
103		Args:
104		*paths: The directories
105		missing_ok: Don't raise an error if a path doesn't exist
106
107		Returns:
108		WritePermissionsError: If a path is not a directory (modulo existence) or doesn't have 'W' set
109		"""
110		paths = [Path(p) for p in paths]
111		for path in paths:
112		if path.exists() and not path.is_dir():
113		msg = f"Path {path} is not a dir"
114		raise WritePermissionsError(msg, key=str(path))
115		if missing_ok and not path.exists():
116		continue
117		if not os.access(path, os.W_OK):
118		msg = f"{path} lacks write permission"
119		raise WritePermissionsError(msg, key=str(path))
120		if not os.access(path, os.X_OK):
121		msg = f"{path} lacks access permission"
122		raise WritePermissionsError(msg, key=str(path))
123
124		@classmethod
125		def write(
126		cls,
127		path_or_buff: FilePath \| BaseBuffer,
128		content,
129		*,
130		mode: str = "w",
131		atomic: bool = False,
132		compression_kwargs: dict[str, Any] \| None = None,
133		**kwargs,
134		) -> str \| None:
135		"""
136		Writes using Pandas's ``get_handle``.
137		By default (unless ``compression=`` is set), infers the compression type from the filename suffix
138		(e.g. ``.csv.gz``).
139		"""
140		if compression_kwargs is None:
141		compression_kwargs = {}
142		if atomic and "a" in mode:
143		msg = "Can't append in atomic write"
144		raise UnsupportedOperationError(msg)
145		if path_or_buff is None:
146		return content
147		compression = cls.path_or_buff_compression(path_or_buff, kwargs)
148		kwargs = {**kwargs, "compression": compression.pandas_value}
149		if atomic and isinstance(path_or_buff, PathLike):
150		path = Path(path_or_buff)
151		tmp = cls.tmp_path(path)
152		with get_handle(tmp, mode, **kwargs) as f:
153		f.handle.write(content)
154		os.replace(tmp, path)
155		with get_handle(path_or_buff, mode, **kwargs) as f:
156		f.handle.write(content)
157
158		@classmethod
159		def read(cls, path_or_buff, , mode: str = "r", *kwargs) -> str:
160		"""
161		Reads using Pandas's ``get_handle``.
162		By default (unless ``compression=`` is set), infers the compression type from the filename suffix.
163		(e.g. ``.csv.gz``).
164		"""
165		compression = cls.path_or_buff_compression(path_or_buff, kwargs)
166		kwargs = {**kwargs, "compression": compression.pandas_value}
167		with get_handle(path_or_buff, mode, **kwargs) as f:
168		return f.handle.read()
169
170		@classmethod
171		def path_or_buff_compression(cls, path_or_buff, kwargs) -> CompressionFormat:
172		if "compression" in kwargs:
173		return CompressionFormat.of(kwargs["compression"])
174		elif isinstance(path_or_buff, PurePath \| str):
175		return CompressionFormat.from_path(path_or_buff)
176		return CompressionFormat.none
177
178		@classmethod
179		def is_binary(cls, path: PathLike) -> bool:
180		path = Path(path)
181		if CompressionFormat.from_path(path).is_compressed:
182		return True
183		return FileFormat.from_path(path).is_binary
184
185		@classmethod
186		def tmp_path(cls, path: PathLike, extra: str = "tmp") -> Path:
187		now = datetime.now().isoformat(timespec="ns").replace(":", "").replace("-", "")
188		path = Path(path)
189		suffix = "".join(path.suffixes)
190		return path.parent / (".__" + extra + "." + now + suffix)
191
192		@classmethod
193		def get_encoding(cls, encoding: str = "utf-8") -> str:
194		"""
195		Returns a text encoding from a more flexible string.
196		Ignores hyphens and lowercases the string.
197		Permits these nonstandard shorthands:
198
199		- ``"platform"``: use ``sys.getdefaultencoding()`` on the fly
200		- ``"utf8(bom)"``: use ``"utf-8-sig"`` on Windows; ``"utf-8"`` otherwise
201		- ``"utf16(bom)"``: use ``"utf-16-sig"`` on Windows; ``"utf-16"`` otherwise
202		- ``"utf32(bom)"``: use ``"utf-32-sig"`` on Windows; ``"utf-32"`` otherwise
203		"""
204		e = encoding.lower().replace("-", "")
205		if e == "platform":
206		return sys.getdefaultencoding()
207		if e == "utf8(bom)":
208		return "utf-8-sig" if os.name == "nt" else "utf-8"
209		if e == "utf16(bom)":
210		return "utf-16-sig" if os.name == "nt" else "utf-16"
211		if e == "utf32(bom)":
212		return "utf-32-sig" if os.name == "nt" else "utf-32"
213		if e in {"utf8", "utf-8"}:
214		return "utf-8"
215		if e in {"utf16", "utf-16"}:
216		return "utf-16"
217		if e in {"utf32", "utf-32"}:
218		return "utf-32"
219		return encoding
220
221		@classmethod
222		def get_encoding_errors(cls, errors: str \| None) -> str \| None:
223		"""
224		Returns the value passed as``errors=`` in ``open``.
225		Raises:
226		ValueError: If invalid
227		"""
228		if errors is None:
229		return "strict"
230		if errors in (
231		"strict",
232		"ignore",
233		"replace",
234		"xmlcharrefreplace",
235		"backslashreplace",
236		"namereplace",
237		"surrogateescape",
238		"surrogatepass",
239		):
240		return errors
241		msg = f"Invalid value {errors} for errors"
242		raise ValueError(msg)
243
244
245		__all__ = ["IoUtils"]
246

dmyersturnbull / typed-dfs

typeddfs.utils.io_utils D last analyzed 2023-10-24 08:21 UTC

Complexity

Size/Duplication

Importance

10 Methods

How to fix Duplicated Code Complexity

Duplicated Code

Complexity

Duplication Side-by-Side

Filter issues like

typeddfs.utils.io_utils D
last analyzed 2023-10-24 08:21 UTC