mandos.entries.filler.CompoundIdFiller._get_pubchem() - Code Metrics - Inspection of "feat: add prediction search; improve taxa" - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( 9813db...5006f2 )

by Douglas

created 2021-08-07 00:13 UTC

CompoundIdFiller._get_pubchem() B

↳ Parent: mandos.entries.filler

Complexity

Conditions

Size

Total Lines	13
Code Lines	12

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	6
eloc	12
nop	3
dl	0
loc	13
rs	8.6666
c	0
b	0
f	0

from __future__ import annotations

from dataclasses import dataclass
from typing import Set, Optional, Mapping, Tuple, Dict


from mandos import logger
from pocketutils.tools.common_tools import CommonTools

from typeddfs import TypedDfs


from mandos.model import CompoundNotFoundError, CompoundStruct


from mandos.entries.api_singletons import Apis
from mandos.model.apis.chembl_support.chembl_utils import ChemblUtils
from mandos.model.apis.pubchem_support.pubchem_data import PubchemData


IdMatchFrame = (
    TypedDfs.typed("IdMatchFrame")
    .reserve("inchikey", dtype=str)
    .reserve("compound_id", "compound_name", "library", dtype=str)
    .reserve("inchi", dtype=str)
    .reserve("chembl_id", "pubchem_id", "hmdb_id", dtype=str)
    .reserve("chembl_inchikey", "pubchem_inchikey", dtype=str)
    .reserve("chembl_inchi", "pubchem_inchi", dtype=str)
    .reserve("origin_inchi", "origin_inchikey", dtype=str)
    .strict(index=True, cols=False)
).build()


FILL_IDS = [
    "inchi",
    "inchikey",
    "chembl_id",
    "pubchem_id",
    "chembl_inchi",
    "chembl_inchikey",
    "pubchem_inchi",
    "pubchem_inchikey",
]
PUT_FIRST = [
    "compound_id",
    "compound_name",
    "library",
    "inchikey",
    "chembl_id",
    "pubchem_id",
    "g2p_id",
    "chembl_inchikey",
    "pubchem_inchikey",
    "origin_inchikey",
]
PUT_LAST = ["inchi", "chembl_inchi", "pubchem_inchi", "origin_inchi", "smiles"]

Db = str


def look(obj, attrs):

    s = CommonTools.look(obj, attrs)

    if isinstance(s, str) and s.upper() == "N/A":
        return None
    return None if CommonTools.is_probable_null(s) else s


@dataclass(frozen=True, repr=True)

class CompoundIdFiller:
    chembl: bool = True
    pubchem: bool = True

    def fill(self, df: IdMatchFrame) -> IdMatchFrame:

        df = self._prep(df)
        logger.info(f"Processing {len(df)} input compounds...")
        fill = []
        for i, row in enumerate(df.itertuples()):
            if i % 200 == 0 and i > 0:
                logger.notice(f"Processed {i:,} / {len(df):,}")
            elif i % 20 == 0 and i > 0:
                logger.info(f"Processed {i:,} / {len(df):,}")
            proc = self._process(
                compound_id=look(row, "compound_id"),
                library=look(row, "library"),
                inchi=look(row, "origin_inchi"),
                inchikey=look(row, "origin_inchikey"),
                pubchem_id=look(row, "origin_pubchem_id"),
                chembl_id=look(row, "origin_chembl_id"),
                line_no=i,
            )
            fill.append(proc)
        for c in FILL_IDS:

            df[c] = [r[c] for r in fill]
        duplicate_cols = []
        for c in FILL_IDS:

            if c in df.columns and "origin_" + c in df.columns:
                if df[c].values.tolist() == df["origin_" + c].values.tolist():
                    duplicate_cols.append("origin_" + c)
        logger.notice(f"Done. Filled {len(df):,} rows.")
        if len(duplicate_cols) > 0:
            df = df.drop_cols(duplicate_cols)
            logger.notice(f"Dropped duplicated columns {', '.join(duplicate_cols)}")
        order = [o for o in PUT_FIRST if o in df.columns]
        order += [c for c in df.columns if c not in PUT_FIRST and c not in PUT_LAST]
        order += [o for o in PUT_LAST if o in df.columns]
        df = df.cfirst(order)
        have_chembl = len(df) - len(df[df["chembl_id"].isnull()]["chembl_id"].tolist())
        have_pubchem = len(df) - len(df[df["pubchem_id"].isnull()]["pubchem_id"].tolist())
        logger.notice(f"{have_chembl:,}/{len(df):,} have ChEMBL IDs")
        logger.notice(f"{have_pubchem:,}/{len(df):,} have PubChem IDs")
        return df

    def _process(

        self,

        compound_id: Optional[str],

        library: Optional[str],

        inchi: Optional[str],

        inchikey: Optional[str],

        pubchem_id: Optional[str],

        chembl_id: Optional[str],

        line_no: int,

    ):
        if inchikey is pubchem_id is chembl_id is None:
            logger.error(f"[line {line_no}] No data for {compound_id}")
            return dict(
                inchi=inchi,
                inchikey=inchikey,
                chembl_id=None,
                chembl_inchi=None,
                chembl_inchikey=None,
                pubchem_id=None,
                pubchem_inchi=None,
                pubchem_inchikey=None,
            )
        fake_x = CompoundStruct("input", compound_id, inchi, inchikey)
        chembl_x = self._get_chembl(inchikey, chembl_id)
        pubchem_x = self._get_pubchem(inchikey, pubchem_id)
        #################################################################################
        # This is important and weird!
        # Where DNE = does not exist and E = exists
        # If chembl DNE and pubchem E ==> fill chembl
        # THEN: If chembl E and (pubchem E or pubchem DNE) ==> fill pubchem
        # we might therefore go from pubchem --> chembl --> pubchem
        # The advantage is that chembl might have a good parent compound
        # Whereas pubchem does not
        # This is often true: chembl is much better at this than pubchem
        # In contrast, only fill ChEMBL if it's missing
        if chembl_x is None and pubchem_x is not None:
            chembl_x = self._get_chembl(pubchem_x.inchikey, None)
        if chembl_x is not None:
            pubchem_x = self._get_pubchem(chembl_x.inchikey, None)
        #################################################################################
        # the order is from best to worst
        prioritize_choices = [chembl_x, pubchem_x, fake_x]
        db_to_struct = {o.db: o for o in prioritize_choices if o is not None}
        inchikey, inchikey_choices = self._choose(db_to_struct, "inchikey")
        inchi, inchi_choices = self._choose(db_to_struct, "inchi")
        about = " ; ".join([x.simple_str for x in prioritize_choices if x is not None])
        if len(inchikey_choices) == 0:
            logger.error(f"[line {line_no}] no database inchikeys found :: {about}")
        elif len(inchikey_choices) > 1:
            logger.error(f"[line {line_no}] inchikey mismatch :: {about} :: {inchikey_choices}")
        elif len(inchi_choices) > 1:
            logger.debug(f"[line {line_no}] inchi mismatch :: {about} :: {inchi_choices}")
        return dict(
            inchi=inchi,
            inchikey=inchikey,
            chembl_id=look(chembl_x, "id"),
            chembl_inchi=look(chembl_x, "inchi"),
            chembl_inchikey=look(chembl_x, "inchikey"),
            pubchem_id=look(pubchem_x, "id"),
            pubchem_inchi=look(pubchem_x, "inchi"),
            pubchem_inchikey=look(pubchem_x, "inchikey"),
        )

    def _choose(
class Foo:
    def some_method(self, x, y):
        return x + y;
        self,

        db_to_struct: Mapping[str, CompoundStruct],

        what: str,

    ) -> Tuple[Optional[str], Dict[str, Db]]:
        """
        Chooses the best what="inchi" or what="inchikey".

        Arguments:
            db_to_struct: Should be in order from most preferred to least
            what: The name of the CompoundStruct attribute to access
        """
        options = {o.db: look(o, what) for o in db_to_struct.values() if look(o, what) is not None}
        _s = ", ".join([f"{k}={v}" for k, v in options.items()])
        non_input_dbs = {v: k for k, v in options.items() if k != "input"}
        all_uniques = set(options.values())
        if len(all_uniques) == 0:

            return None, {}
        else:
            return list(all_uniques)[0], non_input_dbs

    def _prep(self, df: IdMatchFrame) -> IdMatchFrame:
class Foo:
    def some_method(self, x, y):
        return x + y;
        bad_cols = [c for c in df.columns if c.startswith("origin_")]
        if len(bad_cols) > 0:
            raise ValueError(f"Columns {', '.join(bad_cols)} start with 'origin_'")
        rename_cols = {c: "origin_" + c for c in FILL_IDS if c in df.columns}
        if len(rename_cols) > 0:
            logger.notice(f"Renaming columns: {', '.join(rename_cols.keys())}")
        df: IdMatchFrame = df.rename(columns=rename_cols)
        drop_cols = {c for c in df.columns if df[c].isnull().all()}
        if len(drop_cols):

            logger.warning(f"Dropping empty columns: {', '.join(drop_cols)}")
        df = df.drop_cols(drop_cols)
        return df

    def _get_pubchem(self, inchikey: Optional[str], cid: Optional[int]) -> Optional[CompoundStruct]:
class Foo:
    def some_method(self, x, y):
        return x + y;
        api = Apis.Pubchem
        if cid is not None:
            # let it raise a CompoundNotFoundError
            inchikey = api.fetch_data(int(cid)).names_and_identifiers.inchikey
            if inchikey is None:
                return None
        if inchikey is not None:
            try:
                data: Optional[PubchemData] = api.fetch_data(inchikey)
            except CompoundNotFoundError:
                return None
            return None if data is None else data.struct_view

    def _get_chembl(self, inchikey: Optional[str], cid: Optional[str]) -> Optional[CompoundStruct]:
class Foo:
    def some_method(self, x, y):
        return x + y;
        util = ChemblUtils(Apis.Chembl)
        if cid is not None:
            # let it raise a CompoundNotFoundError
            return util.get_compound(cid).struct_view
        try:
            return util.get_compound(inchikey).struct_view
        except CompoundNotFoundError:
            return None


__all__ = ["CompoundIdFiller", "IdMatchFrame"]


1			from __future__ import annotations
			0 ignored issues – show introduced 2021-08-03 04:51 UTC by Report Bug Copy Issue Report Missing module docstring Loading history...
2			from dataclasses import dataclass
3			from typing import Set, Optional, Mapping, Tuple, Dict
			0 ignored issues – show Unused Code introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unused Set imported from typing Loading history...
4
5			from mandos import logger
6			from pocketutils.tools.common_tools import CommonTools
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'pocketutils.tools.common_tools' Loading history...
7			from typeddfs import TypedDfs
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'typeddfs' Loading history...
8
9			from mandos.model import CompoundNotFoundError, CompoundStruct
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Imports from package mandos are not grouped Loading history...
10
11			from mandos.entries.api_singletons import Apis
12			from mandos.model.apis.chembl_support.chembl_utils import ChemblUtils
13			from mandos.model.apis.pubchem_support.pubchem_data import PubchemData
14
15
16			IdMatchFrame = (
17			TypedDfs.typed("IdMatchFrame")
18			.reserve("inchikey", dtype=str)
19			.reserve("compound_id", "compound_name", "library", dtype=str)
20			.reserve("inchi", dtype=str)
21			.reserve("chembl_id", "pubchem_id", "hmdb_id", dtype=str)
22			.reserve("chembl_inchikey", "pubchem_inchikey", dtype=str)
23			.reserve("chembl_inchi", "pubchem_inchi", dtype=str)
24			.reserve("origin_inchi", "origin_inchikey", dtype=str)
25			.strict(index=True, cols=False)
26			).build()
27
28
29			FILL_IDS = [
30			"inchi",
31			"inchikey",
32			"chembl_id",
33			"pubchem_id",
34			"chembl_inchi",
35			"chembl_inchikey",
36			"pubchem_inchi",
37			"pubchem_inchikey",
38			]
39			PUT_FIRST = [
40			"compound_id",
41			"compound_name",
42			"library",
43			"inchikey",
44			"chembl_id",
45			"pubchem_id",
46			"g2p_id",
47			"chembl_inchikey",
48			"pubchem_inchikey",
49			"origin_inchikey",
50			]
51			PUT_LAST = ["inchi", "chembl_inchi", "pubchem_inchi", "origin_inchi", "smiles"]
52
53			Db = str
54
55
56			def look(obj, attrs):
			0 ignored issues – show introduced 2021-08-03 04:51 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
57			s = CommonTools.look(obj, attrs)
			0 ignored issues – show Coding Style Naming introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
58			if isinstance(s, str) and s.upper() == "N/A":
59			return None
60			return None if CommonTools.is_probable_null(s) else s
61
62
63			@dataclass(frozen=True, repr=True)
			0 ignored issues – show introduced 2021-08-03 04:51 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
64			class CompoundIdFiller:
65			chembl: bool = True
66			pubchem: bool = True
67
68			def fill(self, df: IdMatchFrame) -> IdMatchFrame:
			0 ignored issues – show Coding Style Naming introduced 2021-08-03 04:51 UTC by Report Bug Copy Issue Report Argument name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... introduced 2021-08-03 04:51 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
69			df = self._prep(df)
70			logger.info(f"Processing {len(df)} input compounds...")
71			fill = []
72			for i, row in enumerate(df.itertuples()):
73			if i % 200 == 0 and i > 0:
74			logger.notice(f"Processed {i:,} / {len(df):,}")
75			elif i % 20 == 0 and i > 0:
76			logger.info(f"Processed {i:,} / {len(df):,}")
77			proc = self._process(
78			compound_id=look(row, "compound_id"),
79			library=look(row, "library"),
80			inchi=look(row, "origin_inchi"),
81			inchikey=look(row, "origin_inchikey"),
82			pubchem_id=look(row, "origin_pubchem_id"),
83			chembl_id=look(row, "origin_chembl_id"),
84			line_no=i,
85			)
86			fill.append(proc)
87			for c in FILL_IDS:
			0 ignored issues – show Coding Style Naming introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Variable name "c" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
88			df[c] = [r[c] for r in fill]
89			duplicate_cols = []
90			for c in FILL_IDS:
			0 ignored issues – show Coding Style Naming introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Variable name "c" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
91			if c in df.columns and "origin_" + c in df.columns:
92			if df[c].values.tolist() == df["origin_" + c].values.tolist():
93			duplicate_cols.append("origin_" + c)
94			logger.notice(f"Done. Filled {len(df):,} rows.")
95			if len(duplicate_cols) > 0:
96			df = df.drop_cols(duplicate_cols)
97			logger.notice(f"Dropped duplicated columns {', '.join(duplicate_cols)}")
98			order = [o for o in PUT_FIRST if o in df.columns]
99			order += [c for c in df.columns if c not in PUT_FIRST and c not in PUT_LAST]
100			order += [o for o in PUT_LAST if o in df.columns]
101			df = df.cfirst(order)
102			have_chembl = len(df) - len(df[df["chembl_id"].isnull()]["chembl_id"].tolist())
103			have_pubchem = len(df) - len(df[df["pubchem_id"].isnull()]["pubchem_id"].tolist())
104			logger.notice(f"{have_chembl:,}/{len(df):,} have ChEMBL IDs")
105			logger.notice(f"{have_pubchem:,}/{len(df):,} have PubChem IDs")
106			return df
107
108			def _process(
			0 ignored issues – show best-practice introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Too many arguments (8/5) Loading history... Comprehensibility introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report This function exceeds the maximum number of variables (16/15). Loading history...
109			self,
			0 ignored issues – show Coding Style introduced 2021-08-03 04:51 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
110			compound_id: Optional[str],
			0 ignored issues – show Coding Style introduced 2021-08-02 23:39 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
111			library: Optional[str],
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history... Unused Code introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report The argument `library` seems to be unused. Loading history...
112			inchi: Optional[str],
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
113			inchikey: Optional[str],
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
114			pubchem_id: Optional[str],
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
115			chembl_id: Optional[str],
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
116			line_no: int,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
117			):
118			if inchikey is pubchem_id is chembl_id is None:
119			logger.error(f"[line {line_no}] No data for {compound_id}")
120			return dict(
121			inchi=inchi,
122			inchikey=inchikey,
123			chembl_id=None,
124			chembl_inchi=None,
125			chembl_inchikey=None,
126			pubchem_id=None,
127			pubchem_inchi=None,
128			pubchem_inchikey=None,
129			)
130			fake_x = CompoundStruct("input", compound_id, inchi, inchikey)
131			chembl_x = self._get_chembl(inchikey, chembl_id)
132			pubchem_x = self._get_pubchem(inchikey, pubchem_id)
133			#################################################################################
134			# This is important and weird!
135			# Where DNE = does not exist and E = exists
136			# If chembl DNE and pubchem E ==> fill chembl
137			# THEN: If chembl E and (pubchem E or pubchem DNE) ==> fill pubchem
138			# we might therefore go from pubchem --> chembl --> pubchem
139			# The advantage is that chembl might have a good parent compound
140			# Whereas pubchem does not
141			# This is often true: chembl is much better at this than pubchem
142			# In contrast, only fill ChEMBL if it's missing
143			if chembl_x is None and pubchem_x is not None:
144			chembl_x = self._get_chembl(pubchem_x.inchikey, None)
145			if chembl_x is not None:
146			pubchem_x = self._get_pubchem(chembl_x.inchikey, None)
147			#################################################################################
148			# the order is from best to worst
149			prioritize_choices = [chembl_x, pubchem_x, fake_x]
150			db_to_struct = {o.db: o for o in prioritize_choices if o is not None}
151			inchikey, inchikey_choices = self._choose(db_to_struct, "inchikey")
152			inchi, inchi_choices = self._choose(db_to_struct, "inchi")
153			about = " ; ".join([x.simple_str for x in prioritize_choices if x is not None])
154			if len(inchikey_choices) == 0:
155			logger.error(f"[line {line_no}] no database inchikeys found :: {about}")
156			elif len(inchikey_choices) > 1:
157			logger.error(f"[line {line_no}] inchikey mismatch :: {about} :: {inchikey_choices}")
158			elif len(inchi_choices) > 1:
159			logger.debug(f"[line {line_no}] inchi mismatch :: {about} :: {inchi_choices}")
160			return dict(
161			inchi=inchi,
162			inchikey=inchikey,
163			chembl_id=look(chembl_x, "id"),
164			chembl_inchi=look(chembl_x, "inchi"),
165			chembl_inchikey=look(chembl_x, "inchikey"),
166			pubchem_id=look(pubchem_x, "id"),
167			pubchem_inchi=look(pubchem_x, "inchi"),
168			pubchem_inchikey=look(pubchem_x, "inchikey"),
169			)
170
171			def _choose(
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
172			self,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
173			db_to_struct: Mapping[str, CompoundStruct],
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
174			what: str,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
175			) -> Tuple[Optional[str], Dict[str, Db]]:
176			"""
177			Chooses the best what="inchi" or what="inchikey".
178
179			Arguments:
180			db_to_struct: Should be in order from most preferred to least
181			what: The name of the CompoundStruct attribute to access
182			"""
183			options = {o.db: look(o, what) for o in db_to_struct.values() if look(o, what) is not None}
184			_s = ", ".join([f"{k}={v}" for k, v in options.items()])
185			non_input_dbs = {v: k for k, v in options.items() if k != "input"}
186			all_uniques = set(options.values())
187			if len(all_uniques) == 0:
			0 ignored issues – show unused-code introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unnecessary "else" after "return" Loading history...
188			return None, {}
189			else:
190			return list(all_uniques)[0], non_input_dbs
191
192			def _prep(self, df: IdMatchFrame) -> IdMatchFrame:
			0 ignored issues – show Coding Style Naming introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Argument name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
193			bad_cols = [c for c in df.columns if c.startswith("origin_")]
194			if len(bad_cols) > 0:
195			raise ValueError(f"Columns {', '.join(bad_cols)} start with 'origin_'")
196			rename_cols = {c: "origin_" + c for c in FILL_IDS if c in df.columns}
197			if len(rename_cols) > 0:
198			logger.notice(f"Renaming columns: {', '.join(rename_cols.keys())}")
199			df: IdMatchFrame = df.rename(columns=rename_cols)
200			drop_cols = {c for c in df.columns if df[c].isnull().all()}
201			if len(drop_cols):
			0 ignored issues – show Unused Code introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty Loading history...
202			logger.warning(f"Dropping empty columns: {', '.join(drop_cols)}")
203			df = df.drop_cols(drop_cols)
204			return df
205
206			def _get_pubchem(self, inchikey: Optional[str], cid: Optional[int]) -> Optional[CompoundStruct]:
			0 ignored issues – show Unused Code introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Either all return statements in a function should return an expression, or none of them should. Loading history... Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
207			api = Apis.Pubchem
208			if cid is not None:
209			# let it raise a CompoundNotFoundError
210			inchikey = api.fetch_data(int(cid)).names_and_identifiers.inchikey
211			if inchikey is None:
212			return None
213			if inchikey is not None:
214			try:
215			data: Optional[PubchemData] = api.fetch_data(inchikey)
216			except CompoundNotFoundError:
217			return None
218			return None if data is None else data.struct_view
219
220			def _get_chembl(self, inchikey: Optional[str], cid: Optional[str]) -> Optional[CompoundStruct]:
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
221			util = ChemblUtils(Apis.Chembl)
222			if cid is not None:
223			# let it raise a CompoundNotFoundError
224			return util.get_compound(cid).struct_view
225			try:
226			return util.get_compound(inchikey).struct_view
227			except CompoundNotFoundError:
228			return None
229
230
231			__all__ = ["CompoundIdFiller", "IdMatchFrame"]
232

dmyersturnbull / mandos

Push — main ( 9813db...5006f2 )

CompoundIdFiller._get_pubchem() B

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like