Passed
Push — main ( da77b5...65730f )
by Douglas
02:28
created

ArgUtils.parse_taxon()   B

Complexity

Conditions 8

Size

Total Lines 12
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
eloc 12
nop 4
dl 0
loc 12
rs 7.3333
c 0
b 0
f 0
1
from __future__ import annotations
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
3
import os
4
from dataclasses import dataclass
5
from pathlib import Path
6
from typing import (
7
    AbstractSet,
8
    Any,
9
    Callable,
10
    Iterable,
11
    Mapping,
12
    Optional,
13
    Sequence,
14
    Set,
15
    Tuple,
16
    TypeVar,
17
    Union,
18
)
19
20
from pocketutils.core.exceptions import PathExistsError, XTypeError, XValueError
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.exceptions'
Loading history...
21
from pocketutils.misc.typer_utils import Arg, Opt
0 ignored issues
show
introduced by
Unable to import 'pocketutils.misc.typer_utils'
Loading history...
22
from pocketutils.tools.path_tools import PathTools
0 ignored issues
show
introduced by
Unable to import 'pocketutils.tools.path_tools'
Loading history...
23
from regex import regex
0 ignored issues
show
introduced by
Unable to import 'regex'
Loading history...
24
from typeddfs.df_errors import FilenameSuffixError
0 ignored issues
show
introduced by
Unable to import 'typeddfs.df_errors'
Loading history...
25
26
from mandos import logger
27
from mandos.model.apis.chembl_support.chembl_targets import TargetType
28
from mandos.model.apis.pubchem_support.pubchem_models import ClinicalTrialsGovUtils
29
from mandos.model.settings import SETTINGS, Globals
30
from mandos.model.taxonomy import Taxonomy
31
from mandos.model.taxonomy_caches import TaxonomyFactories
32
33
T = TypeVar("T", covariant=True)
0 ignored issues
show
Coding Style Naming introduced by
Class name "T" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
34
35
36
@dataclass(frozen=True, repr=True, order=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
37
class ParsedTaxa:
38
    source: str
39
    allow: Sequence[Union[int, str]]
40
    forbid: Sequence[Union[int, str]]
41
    ancestors: Sequence[Union[int, str]]
42
43
    @classmethod
44
    def empty(cls) -> ParsedTaxa:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
45
        return ParsedTaxa("", [], [], [])
46
47
48
class ArgUtils:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
49
    @classmethod
50
    def definition_bullets(cls, dct: Mapping[Any, Any], colon: str = ": ", indent: int = 12) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
51
        joiner = os.linesep * 2 + " " * indent
52
        jesus = [f" - {k}{colon}{v}" for k, v in dct.items()]
53
        return joiner.join(jesus)
54
55
    @classmethod
56
    def definition_list(cls, dct: Mapping[Any, Any], colon: str = ": ", sep: str = "; ") -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
57
        jesus = [f"{k}{colon}{v}" for k, v in dct.items()]
58
        return sep.join(jesus)
59
60
    @classmethod
61
    def list(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
62
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
63
        lst: Iterable[Any],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
64
        *,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
65
        attr: Union[None, str, Callable[[Any], Any]] = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
66
        sep: str = ", ",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
67
    ) -> str:
68
        x = []
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
69
        for v in lst:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
70
            if attr is None and hasattr(v, "name"):
71
                x += [v.name]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
72
            elif attr is None:
73
                x += [str(v)]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
74
            elif isinstance(attr, str):
75
                x += [str(getattr(v, attr))]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
76
            else:
77
                x += [str(attr(v))]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
78
        return sep.join(x)
79
80
    @classmethod
81
    def get_taxonomy(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
82
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
83
        taxa: Optional[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
84
        *,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
85
        local_only: bool = False,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
86
        allow_forbid: bool = True,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
87
    ) -> Optional[Taxonomy]:
88
        if taxa is None or len(taxa) == 0:
89
            return None
90
        parsed = cls.parse_taxa(taxa, allow_forbid=allow_forbid)
91
        return TaxonomyFactories.get_smart_taxonomy(
92
            allow=parsed.allow,
93
            forbid=parsed.forbid,
94
            ancestors=parsed.ancestors,
95
            local_only=local_only,
96
        )
97
98
    @classmethod
99
    def parse_taxa(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
100
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
101
        taxa: Optional[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
102
        *,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
103
        allow_forbid: bool = True,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
104
    ) -> ParsedTaxa:
105
        if taxa is None or taxa == "":
106
            return ParsedTaxa.empty()
107
        ancestors = f"{Globals.cellular_taxon},{Globals.viral_taxon}"
108
        if ":" in taxa:
109
            ancestors = taxa.split(":", 1)[1]
110
            taxa = taxa.split(":", 1)[0]
111
        taxa_objs = [t.strip() for t in taxa.split(",") if len(t.strip()) > 0]
112
        allow = [t.strip().lstrip("+") for t in taxa_objs if not t.startswith("-")]
113
        forbid = [t.strip().lstrip("-") for t in taxa_objs if t.startswith("-")]
114
        ancestors = [t.strip() for t in ancestors.split(",")]
115
        if not allow_forbid and len(forbid) > 0:
116
            raise XValueError(f"Cannot use '-' in {taxa}")
117
        return ParsedTaxa(
118
            source=taxa,
119
            allow=[ArgUtils.parse_taxon(t, id_only=False) for t in allow],
120
            forbid=[ArgUtils.parse_taxon(t, id_only=False) for t in forbid],
121
            ancestors=[ArgUtils.parse_taxon(t, id_only=True) for t in ancestors],
122
        )
123
124
    @classmethod
125
    def parse_taxa_ids(cls, taxa: str) -> Sequence[int]:
126
        """
127
        Does not allow negatives.
128
        """
129
        if taxa is None or taxa == "":
130
            return []
131
        taxa = [t.strip() for t in taxa.split(",") if len(t.strip()) > 0]
132
        return [ArgUtils.parse_taxon(t, id_only=True) for t in taxa]
133
134
    @classmethod
135
    def parse_taxon(cls, taxon: Union[int, str], *, id_only: bool = False) -> Union[int, str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
136
        std = cls._get_std_taxon(taxon)
137
        if isinstance(taxon, str) and taxon in std:
138
            return std
139
        if isinstance(taxon, str) and not id_only:
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
140
            return taxon
141
        elif isinstance(taxon, str) and taxon.isdigit():
142
            return int(taxon)
143
        if id_only:
144
            raise XTypeError(f"Taxon {taxon} must be an ID")
145
        raise XTypeError(f"Taxon {taxon} must be an ID or name")
146
147
    @classmethod
148
    def _get_std_taxon(cls, taxa: str) -> str:
149
        x = dict(
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
150
            vertebrata=Globals.vertebrata,
151
            vertebrate=Globals.vertebrata,
152
            vertebrates=Globals.vertebrata,
153
            cellular=Globals.cellular_taxon,
154
            cell=Globals.cellular_taxon,
155
            cells=Globals.cellular_taxon,
156
            viral=Globals.viral_taxon,
157
            virus=Globals.viral_taxon,
158
            viruses=Globals.viral_taxon,
159
            all=f"{Globals.cellular_taxon},{Globals.viral_taxon}",
160
        ).get(taxa)
161
        return taxa if x is None else str(x)
162
163
    @staticmethod
164
    def get_trial_statuses(st: str) -> Set[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "st" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
165
        return ClinicalTrialsGovUtils.resolve_statuses(st)
166
167
    @staticmethod
168
    def get_target_types(st: str) -> Set[str]:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "st" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
169
        return {s.name for s in TargetType.resolve(st)}
170
171
172
class EntryUtils:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
173
    @classmethod
174
    def adjust_filename(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
175
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
176
        to: Optional[Path],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
177
        default: Union[str, Path],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
178
        replace: bool,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
179
        *,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
180
        suffixes: Union[None, AbstractSet[str], Callable[[Union[Path, str]], Any]] = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
181
    ) -> Path:
182
        if to is None:
183
            path = Path(default)
184
        elif str(to).startswith("."):
185
            path = Path(default).with_suffix(str(to))
186
        elif str(to).startswith("*."):
187
            path = Path(default).with_suffix(str(to)[1:])
188
        elif to.is_dir() or to.suffix == "":
189
            path = to / default
190
        else:
191
            path = Path(to)
192
        path = Path(path)
193
        if os.name == "nt" and SETTINGS.sanitize_paths:
194
            new_path = Path(*PathTools.sanitize_nodes(path._parts, is_file=True))
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _parts was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
195
            if new_path.resolve() != path.resolve():
196
                logger.warning(f"Sanitized filename {path} → {new_path}")
197
                path = new_path
198
        if (
199
            path.exists()
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
200
            and not path.is_file()
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
201
            and not path.is_socket()
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
202
            and not path.is_char_device()
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
203
        ):
204
            raise PathExistsError(f"Path {path} exists and is not a file")
205
        if path.exists() and not replace:
206
            raise PathExistsError(f"File {path} already exists")
207
        cls._check_suffix(path.suffix, suffixes)
208
        if path.exists() and replace:
209
            logger.info(f"Overwriting existing file {path}")
210
        return path
211
212
    @classmethod
213
    def adjust_dir_name(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
214
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
215
        to: Optional[Path],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
216
        default: Union[str, Path],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
217
        *,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
218
        suffixes: Union[None, AbstractSet[str], Callable[[Union[Path, str]], Any]] = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
219
    ) -> Tuple[Path, str]:
220
        out_dir = Path(default)
221
        suffix = SETTINGS.table_suffix
222
        if to is not None:
223
            pat = regex.compile(r"([^\*]*)(?:\*(\..+))", flags=regex.V1)
224
            m: regex.Match = pat.fullmatch(to)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "m" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
225
            out_dir = default if m.group(1) == "" else m.group(1)
226
            suffix = SETTINGS.table_suffix if m.group(2) == "" else m.group(2)
227
            if out_dir.startswith("."):
228
                logger.warning(f"Writing to {out_dir} — was it meant as a suffix instead?")
229
            out_dir = Path(out_dir)
230
        if os.name == "nt" and SETTINGS.sanitize_paths:
231
            new_dir = Path(*PathTools.sanitize_nodes(out_dir._parts, is_file=True))
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _parts was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
232
            if new_dir.resolve() != out_dir.resolve():
233
                logger.warning(f"Sanitized directory {out_dir} → {new_dir}")
234
                out_dir = new_dir
235
        if out_dir.exists() and not out_dir.is_dir():
236
            raise PathExistsError(f"Path {out_dir} already exists but and is not a directory")
237
        cls._check_suffix(suffix, suffixes)
238
        if out_dir.exists():
239
            n_files = len(list(out_dir.iterdir()))
240
            if n_files > 0:
241
                logger.debug(f"Directory {out_dir} is non-emtpy")
242
        return out_dir, suffix
243
244
    @classmethod
245
    def _check_suffix(cls, suffix, suffixes):
246
        if suffixes is not None and callable(suffixes):
247
            try:
248
                suffixes(suffix)  # make sure it's ok
249
            except FilenameSuffixError:
250
                raise XValueError(f"Unsupported file format {suffix}")
251
        elif suffixes is not None:
252
            if suffix not in suffixes:
253
                raise XValueError(f"Unsupported file format {suffix}")
254
255
256
__all__ = ["Arg", "Opt", "ArgUtils", "EntryUtils"]
257