Passed
Push — main ( c9ac86...a4501a )
by Douglas
02:00
created

FilesysTools.get_info()   C

Complexity

Conditions 9

Size

Total Lines 40
Code Lines 37

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
eloc 37
nop 5
dl 0
loc 40
rs 6.6586
c 0
b 0
f 0
1
import gzip
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import hashlib
3
import importlib.metadata
0 ignored issues
show
Bug introduced by
The name metadata does not seem to exist in module importlib.
Loading history...
introduced by
Unable to import 'importlib.metadata'
Loading history...
4
import locale
5
import logging
6
import os
7
import platform
8
import shutil
9
import socket
10
import struct
11
import sys
12
import stat
13
import tempfile
14
from contextlib import contextmanager
15
from dataclasses import dataclass
16
from datetime import datetime, timezone
17
from getpass import getuser
18
from pathlib import Path, PurePath
19
import pathlib
20
from typing import Any, Generator, Iterable, Mapping, Optional, Sequence, SupportsBytes, Type, Union
21
22
from defusedxml import ElementTree
0 ignored issues
show
introduced by
Unable to import 'defusedxml'
Loading history...
23
import numpy as np
0 ignored issues
show
introduced by
Unable to import 'numpy'
Loading history...
24
import orjson
0 ignored issues
show
introduced by
Unable to import 'orjson'
Loading history...
25
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
26
import regex
0 ignored issues
show
introduced by
Unable to import 'regex'
Loading history...
27
28
from pocketutils.core.exceptions import (
29
    AlreadyUsedError,
30
    ContradictoryRequestError,
31
    FileDoesNotExistError,
32
    ParsingError,
33
    DirDoesNotExistError,
34
)
35
from pocketutils.core.hashers import *
0 ignored issues
show
Coding Style introduced by
The usage of wildcard imports like pocketutils.core.hashers should generally be avoided.
Loading history...
Unused Code introduced by
HashableFile was imported with wildcard, but is not used.
Loading history...
Unused Code introduced by
PrePostHashedFile was imported with wildcard, but is not used.
Loading history...
Unused Code introduced by
IllegalStateError was imported with wildcard, but is not used.
Loading history...
Unused Code introduced by
HashValidationError was imported with wildcard, but is not used.
Loading history...
Unused Code introduced by
Callable was imported with wildcard, but is not used.
Loading history...
Unused Code introduced by
PreHashedFile was imported with wildcard, but is not used.
Loading history...
Unused Code introduced by
PostHashedFile was imported with wildcard, but is not used.
Loading history...
Unused Code introduced by
NonHashedFile was imported with wildcard, but is not used.
Loading history...
Unused Code introduced by
warnings was imported with wildcard, but is not used.
Loading history...
Unused Code introduced by
Hasher was imported with wildcard, but is not used.
Loading history...
36
from pocketutils.core.input_output import OpenMode, PathLike, Writeable
37
from pocketutils.core.web_resource import *
0 ignored issues
show
Coding Style introduced by
The usage of wildcard imports like pocketutils.core.web_resource should generally be avoided.
Loading history...
Unused Code introduced by
zipfile was imported with wildcard, but is not used.
Loading history...
Unused Code introduced by
request was imported with wildcard, but is not used.
Loading history...
Unused Code introduced by
enum was imported with wildcard, but is not used.
Loading history...
38
from pocketutils.tools.base_tools import BaseTools
39
from pocketutils.tools.path_tools import PathTools
40
41
logger = logging.getLogger("pocketutils")
42
COMPRESS_LEVEL = 9
43
44
45
@dataclass(frozen=True, repr=True)
0 ignored issues
show
best-practice introduced by
Too many instance attributes (8/7)
Loading history...
46
class PathInfo:
47
    """
48
    Info about an extant or nonexistent path as it was at some time.
49
    Use this to avoid making repeated filesystem calls (e.g. ``.is_dir()``):
50
    None of the properties defined here make OS calls.
51
52
    Attributes:
53
        source: The original path used for lookup; may be a symlink
54
        resolved: The fully resolved path, or None if it does not exist
55
        as_of: A datetime immediately before the system calls (system timezone)
56
        real_stat: ``os.stat_result``, or None if the path does not exist
57
        link_stat: ``os.stat_result``, or None if the path is not a symlink
58
        has_access: Path exists and has the 'a' flag set
59
        has_read: Path exists and has the 'r' flag set
60
        has_write: Path exists and has the 'w' flag set
61
62
    All of the additional properties refer to the resolved path,
63
    except for :meth:`is_symlink`, :meth:`is_valid_symlink`,
64
    and :meth:`is_broken_symlink`.
65
    """
66
67
    source: Path
68
    resolved: Optional[Path]
69
    as_of: datetime
70
    real_stat: Optional[os.stat_result]
71
    link_stat: Optional[os.stat_result]
72
    has_access: bool
73
    has_read: bool
74
    has_write: bool
75
76
    @property
77
    def mod_or_create_dt(self) -> Optional[datetime]:
78
        """
79
        Returns the modification or access datetime.
80
        Uses whichever is available: creation on Windows and modification on Unix-like.
81
        """
82
        if os.name == "nt":
83
            return self._get_dt("st_ctime")
84
        # will work on posix; on java try anyway
85
        return self._get_dt("st_mtime")
86
87
    @property
88
    def mod_dt(self) -> Optional[datetime]:
89
        """
90
        Returns the modification datetime, if known.
91
        Returns None on Windows or if the path does not exist.
92
        """
93
        if os.name == "nt":
94
            return None
95
        return self._get_dt("st_mtime")
96
97
    @property
98
    def create_dt(self) -> Optional[datetime]:
99
        """
100
        Returns the creation datetime, if known.
101
        Returns None on Unix-like systems or if the path does not exist.
102
        """
103
        if os.name == "posix":
104
            return None
105
        return self._get_dt("st_ctime")
106
107
    @property
108
    def access_dt(self) -> Optional[datetime]:
109
        """
110
        Returns the access datetime.
111
        *Should* never return None if the path exists, but not guaranteed.
112
        """
113
        return self._get_dt("st_atime")
114
115
    @property
116
    def exists(self) -> bool:
117
        """
118
        Returns whether the resolved path exists.
119
        """
120
        return self.real_stat is not None
121
122
    @property
123
    def is_file(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
124
        return self.exists and stat.S_ISREG(self.real_stat.st_mode)
125
126
    @property
127
    def is_dir(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
128
        return self.exists and stat.S_ISDIR(self.real_stat.st_mode)
129
130
    @property
131
    def is_readable_dir(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
132
        return self.is_file and self.has_access and self.has_read
133
134
    @property
135
    def is_writeable_dir(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
136
        return self.is_dir and self.has_access and self.has_write
137
138
    @property
139
    def is_readable_file(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
140
        return self.is_file and self.has_access and self.has_read
141
142
    @property
143
    def is_writeable_file(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
144
        return self.is_file and self.has_access and self.has_write
145
146
    @property
147
    def is_block_device(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
148
        return self.exists and stat.S_ISBLK(self.real_stat.st_mode)
149
150
    @property
151
    def is_char_device(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
152
        return self.exists and stat.S_ISCHR(self.real_stat.st_mode)
153
154
    @property
155
    def is_socket(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
156
        return self.exists and stat.S_ISSOCK(self.real_stat.st_mode)
157
158
    @property
159
    def is_fifo(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
160
        return self.exists and stat.S_ISFIFO(self.real_stat.st_mode)
161
162
    @property
163
    def is_symlink(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
164
        return self.link_stat is not None
165
166
    @property
167
    def is_valid_symlink(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
168
        return self.is_symlink and self.exists
169
170
    @property
171
    def is_broken_symlink(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
172
        return self.is_symlink and not self.exists
173
174
    def _get_dt(self, attr: str) -> Optional[datetime]:
175
        if self.real_stat is None:
176
            return None
177
        sec = getattr(self.real_stat, attr)
178
        return datetime.fromtimestamp(sec).astimezone()
179
180
181
class FilesysTools(BaseTools):
0 ignored issues
show
best-practice introduced by
Too many public methods (25/20)
Loading history...
182
    """
183
    Tools for file/directory creation, etc.
184
185
    .. caution::
186
        Some functions may be insecure.
187
    """
188
189
    @classmethod
190
    def new_webresource(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
191
        cls, url: str, archive_member: Optional[str], local_path: PathLike
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
192
    ) -> WebResource:
193
        return WebResource(url, archive_member, local_path)
194
195
    @classmethod
196
    def is_linux(cls) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
197
        return sys.platform == "linux"
198
199
    @classmethod
200
    def is_windows(cls) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
201
        return sys.platform == "win32"
202
203
    @classmethod
204
    def is_macos(cls) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
205
        return sys.platform == "darwin"
206
207
    @classmethod
208
    def get_info(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
209
        cls, path: PathLike, *, expand_user: bool = False, strict: bool = False
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
210
    ) -> PathInfo:
211
        path = Path(path)
212
        has_ignore_error = hasattr(pathlib, "_ignore_error")
213
        if not has_ignore_error:
214
            logger.debug("No _ignore_error found; some OSErrors may be suppressed")
215
        resolved = None
216
        real_stat = None
217
        has_access = False
218
        has_read = False
219
        has_write = False
220
        link_stat = None
221
        as_of = datetime.now().astimezone()
222
        if has_ignore_error or path.is_symlink() or path.exists():
223
            link_stat = cls.__stat_raw(path)
224
        if link_stat is not None:
225
            if expand_user:
226
                resolved = path.expanduser().resolve(strict=strict)
227
            else:
228
                resolved = path.resolve(strict=strict)
229
            if stat.S_ISLNK(link_stat.st_mode):
230
                real_stat = cls.__stat_raw(resolved)
231
            else:
232
                real_stat = link_stat
233
            has_access = os.access(path, os.X_OK, follow_symlinks=True)
234
            has_read = os.access(path, os.R_OK, follow_symlinks=True)
235
            has_write = os.access(path, os.W_OK, follow_symlinks=True)
236
            if not stat.S_ISLNK(link_stat.st_mode):
237
                link_stat = None
238
        return PathInfo(
239
            source=path,
240
            resolved=resolved,
241
            as_of=as_of,
242
            real_stat=real_stat,
243
            link_stat=link_stat,
244
            has_access=has_access,
245
            has_read=has_read,
246
            has_write=has_write,
247
        )
248
249
    @classmethod
250
    def prep_dir(cls, path: PathLike, *, exist_ok: bool = True) -> bool:
251
        """
252
        Prepares a directory by making it if it doesn't exist.
253
        If exist_ok is False, calls logger.warning it already exists
254
        """
255
        path = Path(path)
256
        exists = path.exists()
257
        # On some platforms we get generic exceptions like permissions errors,
258
        # so these are better
259
        if exists and not path.is_dir():
260
            raise DirDoesNotExistError(f"Path {path} exists but is not a file")
261
        if exists and not exist_ok:
262
            logger.warning(f"Directory {path} already exists")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
263
        if not exists:
264
            # NOTE! exist_ok in mkdir throws an error on Windows
265
            path.mkdir(parents=True)
266
        return exists
267
268
    @classmethod
269
    def prep_file(cls, path: PathLike, *, exist_ok: bool = True) -> None:
270
        """
271
        Prepares a file path by making its parent directory.
272
        Same as ``pathlib.Path.mkdir`` but makes sure ``path`` is a file if it exists.
273
        """
274
        # On some platforms we get generic exceptions like permissions errors, so these are better
275
        path = Path(path)
276
        # check for errors first; don't make the dirs and then fail
277
        if path.exists() and not path.is_file() and not path.is_symlink():
278
            raise FileDoesNotExistError(f"Path {path} exists but is not a file")
279
        Path(path.parent).mkdir(parents=True, exist_ok=exist_ok)
280
281
    @classmethod
282
    def get_env_info(cls, *, include_insecure: bool = False) -> Mapping[str, str]:
283
        """
284
        Get a dictionary of some system and environment information.
285
        Includes os_release, hostname, username, mem + disk, shell, etc.
286
287
        Args:
288
            include_insecure: Include data like hostname and username
289
290
        .. caution ::
291
            Even with ``include_insecure=False``, avoid exposing this data to untrusted
292
            sources. For example, this includes the specific OS release, which could
293
            be used in attack.
294
        """
295
        try:
296
            import psutil
0 ignored issues
show
introduced by
Import outside toplevel (psutil)
Loading history...
297
        except ImportError:
298
            psutil = None
299
            logger.warning("psutil is not installed, so cannot get extended env info")
300
301
        now = datetime.now(timezone.utc).astimezone().isoformat()
302
        uname = platform.uname()
303
        language_code, encoding = locale.getlocale()
0 ignored issues
show
Unused Code introduced by
The variable language_code seems to be unused.
Loading history...
304
        # build up this dict:
305
        data = {}
306
307
        def _try(os_fn, k: str, *args):
308
            if any((a is None for a in args)):
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable a does not seem to be defined.
Loading history...
309
                return None
310
            try:
311
                v = os_fn(*args)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
312
                data[k] = v
313
                return v
314
            except (OSError, ImportError):
315
                return None
316
317
        data.update(
318
            dict(
319
                platform=platform.platform(),
320
                python=".".join(str(i) for i in sys.version_info),
321
                os=uname.system,
322
                os_release=uname.release,
323
                os_version=uname.version,
324
                machine=uname.machine,
325
                byte_order=sys.byteorder,
326
                processor=uname.processor,
327
                build=sys.version,
328
                python_bits=8 * struct.calcsize("P"),
329
                environment_info_capture_datetime=now,
330
                encoding=encoding,
331
                locale=locale,
332
                recursion_limit=sys.getrecursionlimit(),
333
                float_info=sys.float_info,
334
                int_info=sys.int_info,
335
                flags=sys.flags,
336
                hash_info=sys.hash_info,
337
                implementation=sys.implementation,
338
                switch_interval=sys.getswitchinterval(),
339
                filesystem_encoding=sys.getfilesystemencoding(),
340
            )
341
        )
342
        if "LANG" in os.environ:
343
            data["lang"] = os.environ["LANG"]
344
        if "SHELL" in os.environ:
345
            data["shell"] = os.environ["SHELL"]
346
        if "LC_ALL" in os.environ:
347
            data["lc_all"] = os.environ["LC_ALL"]
348
        if hasattr(sys, "winver"):
349
            data["win_ver"] = sys.getwindowsversion()
0 ignored issues
show
Bug introduced by
The Module sys does not seem to have a member named getwindowsversion.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
350
        if hasattr(sys, "mac_ver"):
351
            data["mac_ver"] = sys.mac_ver()
0 ignored issues
show
Bug introduced by
The Module sys does not seem to have a member named mac_ver.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
352
        if hasattr(sys, "linux_distribution"):
353
            data["linux_distribution"] = sys.linux_distribution()
0 ignored issues
show
Bug introduced by
The Module sys does not seem to have a member named linux_distribution.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
354
        if include_insecure:
355
            _try(getuser, "username")
356
            _try(os.getlogin, "login")
357
            _try(socket.gethostname, "hostname")
358
            _try(os.getcwd, "cwd")
359
            pid = _try(os.getpid, "pid")
360
            ppid = _try(os.getppid, "parent_pid")
361
            if hasattr(os, "getpriority"):
362
                _try(os.getpriority, "priority", os.PRIO_PROCESS, pid)
363
                _try(os.getpriority, "parent_priority", os.PRIO_PROCESS, ppid)
364
        if psutil is not None:
365
            data.update(
366
                dict(
367
                    disk_used=psutil.disk_usage(".").used,
368
                    disk_free=psutil.disk_usage(".").free,
369
                    memory_used=psutil.virtual_memory().used,
370
                    memory_available=psutil.virtual_memory().available,
371
                )
372
            )
373
        return {k: str(v) for k, v in dict(data).items()}
374
375
    @classmethod
376
    def list_package_versions(cls) -> Mapping[str, str]:
377
        """
378
        Returns installed packages and their version numbers.
379
        Reliable; uses importlib (Python 3.8+).
380
        """
381
        # calling .metadata reads the metadata file
382
        # and .version is an alias to .metadata["version"]
383
        # so make sure to only read once
384
        # TODO: get installed extras?
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
385
        dct = {}
386
        for d in importlib.metadata.distributions():
0 ignored issues
show
Bug introduced by
The Module importlib does not seem to have a member named metadata.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
387
            meta = d.metadata
388
            dct[meta["name"]] = meta["version"]
389
        return dct
390
391
    @classmethod
392
    def delete_surefire(cls, path: PathLike) -> Optional[Exception]:
393
        """
394
        Deletes files or directories cross-platform, but working around multiple issues in Windows.
395
396
        Returns:
397
            None, or an Exception for minor warnings
398
399
        Raises:
400
            IOError: If it can't delete
401
        """
402
        # we need this because of Windows
403
        path = Path(path)
404
        logger.debug(f"Permanently deleting {path} ...")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
405
        chmod_err = None
406
        try:
407
            os.chmod(str(path), stat.S_IRWXU)
408
        except Exception as e:
0 ignored issues
show
Best Practice introduced by
Catching very general exceptions such as Exception is usually not recommended.

Generally, you would want to handle very specific errors in the exception handler. This ensure that you do not hide other types of errors which should be fixed.

So, unless you specifically plan to handle any error, consider adding a more specific exception.

Loading history...
Coding Style Naming introduced by
Variable name "e" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
409
            chmod_err = e
410
        # another reason for returning exception:
411
        # We don't want to interrupt the current line being printed like in slow_delete
412
        if path.is_dir():
413
            shutil.rmtree(str(path), ignore_errors=True)  # ignore_errors because of Windows
414
            try:
415
                path.unlink(missing_ok=True)  # again, because of Windows
416
            except IOError:
417
                pass  # almost definitely because it doesn't exist
418
        else:
419
            path.unlink(missing_ok=True)
420
        logger.debug(f"Permanently deleted {path}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
421
        return chmod_err
422
423
    @classmethod
424
    def trash(cls, path: PathLike, trash_dir: Optional[PathLike] = None) -> None:
425
        """
426
        Trash a file or directory.
427
428
        Args:
429
            path: The path to move to the trash
430
            trash_dir: If None, uses :meth:`pocketutils.tools.path_tools.PathTools.guess_trash`
431
        """
432
        if trash_dir is None:
433
            trash_dir = PathTools.guess_trash()
434
        logger.debug(f"Trashing {path} to {trash_dir} ...")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
435
        shutil.move(str(path), str(trash_dir))
436
        logger.debug(f"Trashed {path} to {trash_dir}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
437
438
    @classmethod
439
    def try_cleanup(cls, path: Path, *, bound: Type[Exception] = PermissionError) -> None:
440
        """
441
        Try to delete a file (probably temp file), if it exists, and log any PermissionError.
442
        """
443
        path = Path(path)
444
        # noinspection PyBroadException
445
        try:
446
            path.unlink(missing_ok=True)
447
        except bound:
448
            logger.error(f"Permission error preventing deleting {path}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
449
450
    @classmethod
451
    def read_lines_file(cls, path: PathLike, *, ignore_comments: bool = False) -> Sequence[str]:
452
        """
453
        Returns a list of lines in the file.
454
        Optionally skips lines starting with '#' or that only contain whitespace.
455
        """
456
        lines = []
457
        with cls.open_file(path, "r") as f:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
458
            for line in f.readlines():
459
                line = line.strip()
460
                if not ignore_comments or not line.startswith("#") and not len(line.strip()) == 0:
461
                    lines.append(line)
462
        return lines
463
464
    @classmethod
465
    def read_properties_file(cls, path: PathLike) -> Mapping[str, str]:
466
        """
467
        Reads a .properties file.
468
        A list of lines with key=value pairs (with an equals sign).
469
        Lines beginning with # are ignored.
470
        Each line must contain exactly 1 equals sign.
471
472
        .. caution::
473
            The escaping is not compliant with the standard
474
475
        Args:
476
            path: Read the file at this local path
477
478
        Returns:
479
            A dict mapping keys to values, both with surrounding whitespace stripped
480
        """
481
        dct = {}
482
        with cls.open_file(path, "r") as f:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
483
            for i, line in enumerate(f.readlines()):
484
                line = line.strip()
485
                if len(line) == 0 or line.startswith("#"):
486
                    continue
487
                if line.count("=") != 1:
488
                    raise ParsingError(f"Bad line {i} in {path}", resource=path)
489
                k, v = line.split("=")
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
490
                k, v = k.strip(), v.strip()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
491
                if k in dct:
492
                    raise AlreadyUsedError(f"Duplicate property {k} (line {i})", key=k)
493
                dct[k] = v
494
        return dct
495
496
    @classmethod
497
    def write_properties_file(
498
        cls, properties: Mapping[Any, Any], path: Union[str, PurePath], mode: str = "o"
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
499
    ) -> None:
500
        """
501
        Writes a .properties file.
502
503
        .. caution::
504
            The escaping is not compliant with the standard
505
        """
506
        if not OpenMode(mode).write:
507
            raise ContradictoryRequestError(f"Cannot write text to {path} in mode {mode}")
508
        with FilesysTools.open_file(path, mode) as f:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
509
            bads = []
510
            for k, v in properties.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
511
                if "=" in k or "=" in v or "\n" in k or "\n" in v:
512
                    bads.append(k)
513
                f.write(
514
                    str(k).replace("=", "--").replace("\n", "\\n")
515
                    + "="
516
                    + str(v).replace("=", "--").replace("\n", "\\n")
517
                    + "\n"
518
                )
519
            if 0 < len(bads) <= 10:
520
                logger.warning(
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
521
                    "At least one properties entry contains an equals sign or newline (\\n)."
522
                    f"These were escaped: {', '.join(bads)}"
523
                )
524
            elif len(bads) > 0:
525
                logger.warning(
526
                    "At least one properties entry contains an equals sign or newline (\\n),"
527
                    "which were escaped."
528
                )
529
530
    @classmethod
531
    def save_json(cls, data: Any, path: PathLike, mode: str = "w") -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
532
        with cls.open_file(path, mode) as f:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
533
            f.write(orjson.dumps(data).decode(encoding="utf8"))
534
535
    @classmethod
536
    def load_json(cls, path: PathLike) -> Union[dict, list]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
537
        return orjson.loads(Path(path).read_text(encoding="utf8"))
538
539
    @classmethod
540
    def read_any(
0 ignored issues
show
best-practice introduced by
Too many return statements (10/6)
Loading history...
541
        cls, path: PathLike
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
542
    ) -> Union[
543
        str,
544
        bytes,
545
        Sequence[str],
546
        pd.DataFrame,
547
        Sequence[int],
548
        Sequence[float],
549
        Sequence[str],
550
        Mapping[str, str],
551
    ]:
552
        """
553
        Reads a variety of simple formats based on filename extension.
554
        Includes '.txt', 'csv', .xml', '.properties', '.json'.
555
        Also reads '.data' (binary), '.lines' (text lines).
556
        And formatted lists: '.strings', '.floats', and '.ints' (ex: "[1, 2, 3]").
557
        """
558
        path = Path(path)
559
        ext = path.suffix.lstrip(".")
560
561
        def load_list(dtype):
562
            return [
563
                dtype(s)
564
                for s in FilesysTools.read_lines_file(path)[0]
565
                .replace(" ", "")
566
                .replace("[", "")
567
                .replace("]", "")
568
                .split(",")
569
            ]
570
571
        if ext == "lines":
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
572
            return cls.read_lines_file(path)
573
        elif ext == "txt":
574
            return path.read_text(encoding="utf-8")
575
        elif ext == "data":
576
            return path.read_bytes()
577
        elif ext == "json":
578
            return cls.load_json(path)
579
        elif ext in ["npy", "npz"]:
580
            return np.load(str(path), allow_pickle=False, encoding="utf8")
581
        elif ext == "properties":
582
            return cls.read_properties_file(path)
583
        elif ext == "csv":
584
            return pd.read_csv(path, encoding="utf8")
585
        elif ext == "ints":
586
            return load_list(int)
587
        elif ext == "floats":
588
            return load_list(float)
589
        elif ext == "strings":
590
            return load_list(str)
591
        elif ext == "xml":
592
            ElementTree.parse(path).getroot()
593
        else:
594
            raise TypeError(f"Did not recognize resource file type for file {path}")
595
596
    @classmethod
597
    @contextmanager
598
    def open_file(cls, path: PathLike, mode: Union[OpenMode, str], *, mkdir: bool = False):
599
        """
600
        Opens a text file, always using utf8, optionally gzipped.
601
602
        See Also:
603
            :class:`pocketutils.core.input_output.OpenMode`
604
        """
605
        path = Path(path)
606
        mode = OpenMode(mode)
607
        if mode.write and mkdir:
608
            path.parent.mkdir(exist_ok=True, parents=True)
609
        if not mode.read:
610
            cls.prep_file(path, exist_ok=mode.overwrite or mode.append)
611
        if mode.gzipped:
612
            yield gzip.open(path, mode.internal, compresslevel=COMPRESS_LEVEL, encoding="utf8")
613
        elif mode.binary:
614
            yield open(path, mode.internal, encoding="utf8")
615
        else:
616
            yield open(path, mode.internal, encoding="utf8")
617
618
    @classmethod
619
    def write_lines(cls, iterable: Iterable[Any], path: PathLike, mode: str = "w") -> int:
620
        """
621
        Just writes an iterable line-by-line to a file, using '\n'.
622
        Makes the parent directory if needed.
623
        Checks that the iterable is a "true iterable" (not a string or bytes).
624
625
        Returns:
626
            The number of lines written (the same as len(iterable) if iterable has a length)
627
628
        Raises:
629
            FileExistsError: If the path exists and append is False
630
            PathIsNotFileError: If append is True, and the path exists but is not a file
631
        """
632
        if not cls.is_true_iterable(iterable):
633
            raise TypeError("Not a true iterable")  # TODO include iterable if small
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
634
        n = 0
0 ignored issues
show
Coding Style Naming introduced by
Variable name "n" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
635
        with cls.open_file(path, mode) as f:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
636
            for x in iterable:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
637
                f.write(str(x) + "\n")
638
            n += 1
0 ignored issues
show
Coding Style Naming introduced by
Variable name "n" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
639
        return n
640
641
    @classmethod
642
    def hash_hex(cls, x: SupportsBytes, algorithm: str) -> str:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
643
        """
644
        Returns the hex-encoded hash of the object (converted to bytes).
645
        """
646
        m = hashlib.new(algorithm)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "m" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
647
        m.update(bytes(x))
648
        return m.hexdigest()
649
650
    @classmethod
651
    def replace_in_file(cls, path: PathLike, changes: Mapping[str, str]) -> None:
652
        """
653
        Uses re.sub repeatedly to modify (AND REPLACE) a file's content.
654
        """
655
        path = Path(path)
656
        data = path.read_text(encoding="utf-8")
657
        for key, value in changes.items():
658
            data = regex.sub(key, value, data, flags=regex.V1 | regex.MULTILINE | regex.DOTALL)
659
        path.write_text(data, encoding="utf-8")
660
661
    @classmethod
662
    def tmppath(cls, path: Optional[PathLike] = None, **kwargs) -> Generator[Path, None, None]:
663
        """
664
        Makes a temporary Path. Won't create ``path`` but will delete it at the end.
665
        If ``path`` is None, will use ``tempfile.mkstemp``.
666
        """
667
        if path is None:
668
            _, path = tempfile.mkstemp()
669
        try:
670
            yield Path(path, **kwargs)
671
        finally:
672
            Path(path).unlink()
673
674
    @classmethod
675
    def tmpfile(
676
        cls, path: Optional[PathLike] = None, *, spooled: bool = False, **kwargs
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
677
    ) -> Generator[Writeable, None, None]:
678
        """
679
        Simple wrapper around tempfile functions.
680
        Wraps ``TemporaryFile``, ``NamedTemporaryFile``, and ``SpooledTemporaryFile``.
681
        """
682
        if spooled:
683
            with tempfile.SpooledTemporaryFile(**kwargs) as x:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
684
                yield x
685
        elif path is None:
686
            with tempfile.TemporaryFile(**kwargs) as x:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
687
                yield x
688
        else:
689
            with tempfile.NamedTemporaryFile(str(path), **kwargs) as x:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
690
                yield x
691
692
    @classmethod
693
    def tmpdir(cls, **kwargs) -> Generator[Path, None, None]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
694
        with tempfile.TemporaryDirectory(**kwargs) as x:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
695
            yield Path(x)
696
697
    @classmethod
698
    def __stat_raw(cls, path: Path) -> Optional[os.stat_result]:
699
        try:
700
            return path.lstat()
701
        except OSError as e:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "e" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
702
            if hasattr(pathlib, "_ignore_error") and not pathlib._ignore_error(e):
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _ignore_error was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
Bug introduced by
The Module pathlib does not seem to have a member named _ignore_error.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
703
                raise
704
        return None
705
706
707
__all__ = ["FilesysTools", "PathInfo"]
708