mandos.cli.Commands.dl_tax() - Code Metrics - Inspection of "feat: tests and taxa by name" - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( 4e4203...cdf0f7 )

by Douglas

created 2021-04-01 01:10 UTC

mandos.cli.Commands.dl_tax() A

↳ Parent: mandos.cli

Complexity

Conditions

Size

Total Lines	16
Code Lines	4

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
eloc	4
nop	1
dl	0
loc	16
rs	10
c	0
b	0
f	0

"""
Command-line interface for mandos.
"""

from __future__ import annotations

import logging
from pathlib import Path

import typer


from mandos import logger
from mandos.model.settings import MANDOS_SETTINGS
from mandos.model.taxonomy import TaxonomyDf
from mandos.model.taxonomy_caches import TaxonomyFactories
from mandos.entries.entries import Entries, _Typer
from mandos.entries.api_singletons import Apis
from mandos.entries.multi_searches import MultiSearch
from mandos.entries.searcher import SearcherUtils

# IMPORTANT!
Apis.set_default()
cli = typer.Typer()
# _old_wrap_text = copy(click.formatting.wrap_text)
# def _new_wrap_text(
#    text, width=100, initial_indent="", subsequent_indent="", preserve_paragraphs=False
# ):
#    return _old_wrap_text(text, 100, initial_indent, subsequent_indent, preserve_paragraphs)
# click.formatting.wrap_text = _new_wrap_text


class Commands:
    """
    Entry points for mandos.
    """

    @staticmethod
    def search(
        path: Path = _Typer.path,

        config: Path = typer.Argument(

            None,
            help=".toml config file. See docs.",
            exists=True,
            dir_okay=False,
            readable=True,
        ),
    ) -> None:
        """
        Run multiple searches.
        """
        MultiSearch(path, config).search()

    @staticmethod
    def find(
        path: Path = _Typer.path,

        pubchem: bool = typer.Option(True, help="Download data from PubChem"),

        chembl: bool = typer.Option(True, help="Download data from ChEMBL"),

        hmdb: bool = typer.Option(True, help="Download data from HMDB"),

    ) -> None:
        """
        Fetches and caches compound data.
        Useful to check what you can see before running a search.
        """
        out_path = path.with_suffix(".ids.csv")
        if out_path.exists():
            raise FileExistsError(out_path)
        inchikeys = SearcherUtils.read(path)
        df = SearcherUtils.dl(inchikeys, pubchem=pubchem, chembl=chembl, hmdb=hmdb)

        df.to_csv(out_path)
        typer.echo(f"Wrote to {out_path}")

    @staticmethod
    def build_taxonomy(

        taxa: str = typer.Argument(

            None,
            help="""
            UniProt taxon ID or scientific name, comma-separated.
            Scientific names are only permitted for subsets of vertebrata.
        """,
        ),
        to: Path = typer.Option(

            None,
            show_default=False,
            help="""
        Output file; can be CSV, TSV, feather, etc.
        If it starts with '.', uses the default path but changes the format and filename extension.

        [default: <taxon-id,<taxon-id>,...>.feather]
        """,
        ),
    ):
        """
        Writes a CSV file of the descendents of given taxa.
        """
        taxon_ids = [
            int(taxon.strip()) if taxon.isdigit() else taxon.strip() for taxon in taxa.split(",")
        ]
        # get the filename
        # by default we'll just use the inputs
        default_path = Path(",".join([str(t).strip() for t in taxa]) + ".tab.gz")
        if to is None:
            to = default_path
        elif str(to).startswith("."):
            to = default_path.with_suffix(str(to))
        to.parent.mkdir(exist_ok=True, parents=True)
        # TODO: this is quite inefficient

        # we're potentially reading in the vertebrata file multiple times
        # we could instead read it in, then concatenate the matching subtrees
        # however, this is moderately efficient if you ask for, e.g., Mammalia and Plantae
        # then it'll download Plantae but just get Mammalia from the resource-file Vertebrata
        taxes = []
        for taxon in taxon_ids:
            tax = TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)
            taxes.append(tax.to_df())
        final_tax = TaxonomyDf.concat(taxes, ignore_index=True)
        final_tax = final_tax.drop_duplicates().sort_values("taxon")
        final_tax.write_file(to)

    @staticmethod
    def dl_tax(
        taxon: int,

    ) -> None:
        """
        Preps a new taxonomy file for use in mandos.
        Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``).

        Otherwise, downloads a tab-separated file from UniProt.
        (To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.)
        Then applies fixes and reduces the file size, creating a new file alongside.
        Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``.

        Args:
            taxon: The **ID** of the UniProt taxon
        """
        TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)


# Oh dear this is a nightmare
# it's really hard to create typer commands with dynamically configured params --
# we really need to rely on its inferring of params
# that makes this really hard to do well
for entry in Entries:
    from typer.models import CommandInfo


    info = CommandInfo(entry.cmd(), callback=entry.run)
    cli.registered_commands.append(info)
    # print(f"Registered {entry.cmd()} to {entry}")
    setattr(Commands, entry.cmd(), entry.run)

cli.registered_commands.extend(
    [
        CommandInfo("@search", callback=Commands.search),

        CommandInfo("@tax-tree", callback=Commands.build_taxonomy),
        CommandInfo("@tax-dl", callback=Commands.dl_tax, hidden=True),
    ]
)


if __name__ == "__main__":
    # logging.basicConfig(level=0)
    import sys

    root = logging.getLogger()
    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(0)
    formatter = logging.Formatter("%(levelname)-7s %(asctime)s %(message)s", "%Y%m%d:%H:%M:%S")
    handler.setFormatter(formatter)
    root.addHandler(handler)
    logger.addHandler(handler)
    # log_factory = PrettyRecordFactory(10, 12, 5, width=100, symbols=True).modifying(logger)
    # good start; can be changed
    root.setLevel(logging.WARNING)
    logger.setLevel(logging.INFO)
    cli()


__all__ = ["Commands"]


1			"""
2			Command-line interface for mandos.
3			"""
4
5			from __future__ import annotations
6
7			import logging
8			from pathlib import Path
9
10			import typer
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Unable to import 'typer' Loading history...
11
12			from mandos import logger
13			from mandos.model.settings import MANDOS_SETTINGS
14			from mandos.model.taxonomy import TaxonomyDf
15			from mandos.model.taxonomy_caches import TaxonomyFactories
16			from mandos.entries.entries import Entries, _Typer
17			from mandos.entries.api_singletons import Apis
18			from mandos.entries.multi_searches import MultiSearch
19			from mandos.entries.searcher import SearcherUtils
20
21			# IMPORTANT!
22			Apis.set_default()
23			cli = typer.Typer()
24			# _old_wrap_text = copy(click.formatting.wrap_text)
25			# def _new_wrap_text(
26			# text, width=100, initial_indent="", subsequent_indent="", preserve_paragraphs=False
27			# ):
28			# return _old_wrap_text(text, 100, initial_indent, subsequent_indent, preserve_paragraphs)
29			# click.formatting.wrap_text = _new_wrap_text
30
31
32			class Commands:
33			"""
34			Entry points for mandos.
35			"""
36
37			@staticmethod
38			def search(
39			path: Path = _Typer.path,
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
40			config: Path = typer.Argument(
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
41			None,
42			help=".toml config file. See docs.",
43			exists=True,
44			dir_okay=False,
45			readable=True,
46			),
47			) -> None:
48			"""
49			Run multiple searches.
50			"""
51			MultiSearch(path, config).search()
52
53			@staticmethod
54			def find(
55			path: Path = _Typer.path,
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
56			pubchem: bool = typer.Option(True, help="Download data from PubChem"),
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
57			chembl: bool = typer.Option(True, help="Download data from ChEMBL"),
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
58			hmdb: bool = typer.Option(True, help="Download data from HMDB"),
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
59			) -> None:
60			"""
61			Fetches and caches compound data.
62			Useful to check what you can see before running a search.
63			"""
64			out_path = path.with_suffix(".ids.csv")
65			if out_path.exists():
66			raise FileExistsError(out_path)
67			inchikeys = SearcherUtils.read(path)
68			df = SearcherUtils.dl(inchikeys, pubchem=pubchem, chembl=chembl, hmdb=hmdb)
			0 ignored issues – show Coding Style Naming introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
69			df.to_csv(out_path)
70			typer.echo(f"Wrote to {out_path}")
71
72			@staticmethod
73			def build_taxonomy(
			0 ignored issues – show Coding Style Naming introduced 2021-04-01 01:11 UTC by Report Bug Copy Issue Report Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
74			taxa: str = typer.Argument(
			0 ignored issues – show Coding Style introduced 2021-03-24 04:52 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
75			None,
76			help="""
77			UniProt taxon ID or scientific name, comma-separated.
78			Scientific names are only permitted for subsets of vertebrata.
79			""",
80			),
81			to: Path = typer.Option(
			0 ignored issues – show Coding Style introduced 2021-04-01 01:11 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
82			None,
83			show_default=False,
84			help="""
85			Output file; can be CSV, TSV, feather, etc.
86			If it starts with '.', uses the default path but changes the format and filename extension.
87
88			[default: <taxon-id,<taxon-id>,...>.feather]
89			""",
90			),
91			):
92			"""
93			Writes a CSV file of the descendents of given taxa.
94			"""
95			taxon_ids = [
96			int(taxon.strip()) if taxon.isdigit() else taxon.strip() for taxon in taxa.split(",")
97			]
98			# get the filename
99			# by default we'll just use the inputs
100			default_path = Path(",".join([str(t).strip() for t in taxa]) + ".tab.gz")
101			if to is None:
102			to = default_path
103			elif str(to).startswith("."):
104			to = default_path.with_suffix(str(to))
105			to.parent.mkdir(exist_ok=True, parents=True)
106			# TODO: this is quite inefficient
			0 ignored issues – show Coding Style introduced 2021-04-01 01:11 UTC by Report Bug Copy Issue Report `TODO` and `FIXME` comments should generally be avoided. Loading history...
107			# we're potentially reading in the vertebrata file multiple times
108			# we could instead read it in, then concatenate the matching subtrees
109			# however, this is moderately efficient if you ask for, e.g., Mammalia and Plantae
110			# then it'll download Plantae but just get Mammalia from the resource-file Vertebrata
111			taxes = []
112			for taxon in taxon_ids:
113			tax = TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)
114			taxes.append(tax.to_df())
115			final_tax = TaxonomyDf.concat(taxes, ignore_index=True)
116			final_tax = final_tax.drop_duplicates().sort_values("taxon")
117			final_tax.write_file(to)
118
119			@staticmethod
120			def dl_tax(
121			taxon: int,
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
122			) -> None:
123			"""
124			Preps a new taxonomy file for use in mandos.
125			Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``).
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (113/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
126			Otherwise, downloads a tab-separated file from UniProt.
127			(To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.)
128			Then applies fixes and reduces the file size, creating a new file alongside.
129			Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``.
130
131			Args:
132			taxon: The ID of the UniProt taxon
133			"""
134			TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)
135
136
137			# Oh dear this is a nightmare
138			# it's really hard to create typer commands with dynamically configured params --
139			# we really need to rely on its inferring of params
140			# that makes this really hard to do well
141			for entry in Entries:
142			from typer.models import CommandInfo
			0 ignored issues – show introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Unable to import 'typer.models' Loading history...
143
144			info = CommandInfo(entry.cmd(), callback=entry.run)
145			cli.registered_commands.append(info)
146			# print(f"Registered {entry.cmd()} to {entry}")
147			setattr(Commands, entry.cmd(), entry.run)
148
149			cli.registered_commands.extend(
150			[
151			CommandInfo("@search", callback=Commands.search),
			0 ignored issues – show introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report The variable `CommandInfo` does not seem to be defined in case the `for` loop on line `141` is not entered. Are you sure this can never be the case? Loading history...
152			CommandInfo("@tax-tree", callback=Commands.build_taxonomy),
153			CommandInfo("@tax-dl", callback=Commands.dl_tax, hidden=True),
154			]
155			)
156
157
158			if __name__ == "__main__":
159			# logging.basicConfig(level=0)
160			import sys
161
162			root = logging.getLogger()
163			handler = logging.StreamHandler(sys.stdout)
164			handler.setLevel(0)
165			formatter = logging.Formatter("%(levelname)-7s %(asctime)s %(message)s", "%Y%m%d:%H:%M:%S")
166			handler.setFormatter(formatter)
167			root.addHandler(handler)
168			logger.addHandler(handler)
169			# log_factory = PrettyRecordFactory(10, 12, 5, width=100, symbols=True).modifying(logger)
170			# good start; can be changed
171			root.setLevel(logging.WARNING)
172			logger.setLevel(logging.INFO)
173			cli()
174
175
176			__all__ = ["Commands"]
177

dmyersturnbull / mandos

Push — main ( 4e4203...cdf0f7 )

mandos.cli.Commands.dl_tax() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like