1
|
|
|
from pathlib import Path |
|
|
|
|
2
|
|
|
from typing import Set, Sequence, Optional, Union, Tuple |
3
|
|
|
|
4
|
|
|
from pocketutils.core.exceptions import PathExistsError |
|
|
|
|
5
|
|
|
from regex import regex |
|
|
|
|
6
|
|
|
from typeddfs import FileFormat |
|
|
|
|
7
|
|
|
|
8
|
|
|
from mandos.model.utils.setup import logger |
9
|
|
|
from mandos.model.settings import MANDOS_SETTINGS |
10
|
|
|
from mandos.model.apis.chembl_support.chembl_activity import DataValidityComment |
11
|
|
|
from mandos.model.apis.chembl_support.chembl_targets import TargetType |
12
|
|
|
from mandos.model.apis.pubchem_support.pubchem_models import ClinicalTrialsGovUtils |
13
|
|
|
from mandos.model.taxonomy import Taxonomy |
14
|
|
|
from mandos.model.taxonomy_caches import TaxonomyFactories |
15
|
|
|
|
16
|
|
|
DEF_SUFFIX = MANDOS_SETTINGS.default_table_suffix |
17
|
|
|
|
18
|
|
|
|
19
|
|
|
class EntryUtils: |
|
|
|
|
20
|
|
|
""" """ |
21
|
|
|
|
22
|
|
|
@classmethod |
23
|
|
|
def adjust_filename(cls, to: Optional[Path], default: Union[str, Path], replace: bool) -> Path: |
|
|
|
|
24
|
|
|
if to is None: |
25
|
|
|
path = Path(default) |
26
|
|
|
elif str(to).startswith("."): |
27
|
|
|
path = Path(default).with_suffix(str(to)) |
28
|
|
|
elif str(to).startswith("*."): |
29
|
|
|
path = Path(default).with_suffix(str(to)[1:]) |
30
|
|
|
elif to.is_dir() or to.suffix == "": |
31
|
|
|
path = to / default |
32
|
|
|
else: |
33
|
|
|
path = Path(to) |
34
|
|
|
if path.exists() and not replace: |
|
|
|
|
35
|
|
|
raise PathExistsError(f"File {path} already exists") |
36
|
|
|
elif replace: |
37
|
|
|
logger.info(f"Overwriting existing file {path}.") |
38
|
|
|
return path |
39
|
|
|
|
40
|
|
|
@classmethod |
41
|
|
|
def adjust_dir_name(cls, to: Optional[Path], default: Union[str, Path]) -> Tuple[Path, str]: |
|
|
|
|
42
|
|
|
out_dir = Path(default) |
43
|
|
|
suffix = DEF_SUFFIX |
44
|
|
|
if to is not None: |
45
|
|
|
pat = regex.compile(r"([^\*]*)(?:\*(\..+))", flags=regex.V1) |
46
|
|
|
m: regex.Match = pat.fullmatch(to) |
|
|
|
|
47
|
|
|
out_dir = default if m.group(1) == "" else m.group(1) |
48
|
|
|
suffix = DEF_SUFFIX if m.group(2) == "" else m.group(2) |
49
|
|
|
if out_dir.startswith("."): |
50
|
|
|
logger.warning(f"Writing to {out_dir} - was it meant as a suffix instead?") |
51
|
|
|
out_dir = Path(out_dir) |
52
|
|
|
if out_dir.exists() and not out_dir.is_dir(): |
53
|
|
|
raise PathExistsError(f"Path {out_dir} already exists but is not a directory") |
54
|
|
|
FileFormat.from_suffix(suffix) # make sure it's ok |
55
|
|
|
if out_dir.exists(): |
56
|
|
|
n_files = len(list(out_dir.iterdir())) |
57
|
|
|
if n_files > 0: |
58
|
|
|
logger.warning(f"Directory {out_dir} is non-emtpy") |
59
|
|
|
return out_dir, suffix |
60
|
|
|
|
61
|
|
|
@staticmethod |
62
|
|
|
def split(st: str) -> Set[str]: |
|
|
|
|
63
|
|
|
return {s.strip() for s in st.split(",")} |
64
|
|
|
|
65
|
|
|
@staticmethod |
66
|
|
|
def get_taxa(taxa: Optional[str]) -> Sequence[Taxonomy]: |
|
|
|
|
67
|
|
|
if taxa is None: |
68
|
|
|
return [] |
69
|
|
|
factory = TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path) |
70
|
|
|
return [factory.load(str(taxon).strip()) for taxon in taxa.split(",")] |
71
|
|
|
|
72
|
|
|
@staticmethod |
73
|
|
|
def get_trial_statuses(st: str) -> Set[str]: |
|
|
|
|
74
|
|
|
return ClinicalTrialsGovUtils.resolve_statuses(st) |
75
|
|
|
|
76
|
|
|
@staticmethod |
77
|
|
|
def get_target_types(st: str) -> Set[str]: |
|
|
|
|
78
|
|
|
return {s.name for s in TargetType.resolve(st)} |
79
|
|
|
|
80
|
|
|
@staticmethod |
81
|
|
|
def get_flags(st: str) -> Set[str]: |
|
|
|
|
82
|
|
|
return {s.name for s in DataValidityComment.resolve(st)} |
83
|
|
|
|
84
|
|
|
|
85
|
|
|
__all__ = ["EntryUtils"] |
86
|
|
|
|