mandos.model.apis.chembl_scrape_api - Code Metrics - Inspection of "feat: add prediction search; improve taxa" - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( 9813db...5006f2 )

by Douglas

created 2021-08-07 00:13 UTC

mandos.model.apis.chembl_scrape_api A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	150
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	111
dl	0
loc	150
rs	10
c	0
b	0
f	0
wmc	18

10 Methods

Rating	Name	Size	Complexity
A	CachingChemblScrapeApi._fetch_page()	10	3
A	_ScraperSingleton.get()	5	2
A	QueryingChemblScrapeApi._fetch_page()	21	4
A	ChemblScrapeApi._fetch_page()	2	1
A	QueryingChemblScrapeApi.__init__()	2	1
A	ChemblScrapeApi.fetch_predictions()	3	1
A	CachingChemblScrapeApi.__init__()	7	1
A	CachingChemblScrapeApi.path()	3	1
A	SarPredictionResult.yes_no_mixed()	8	1
A	SarPredictionResult.score()	8	1

1 Function

Rating	Name	Duplication	Size	Complexity
A	_parse_conf()	0	4	2

"""
API that web-scrapes ChEMBL.
"""
from __future__ import annotations

import abc
import enum
from pathlib import Path
from typing import Optional, Type

import pandas as pd

from pocketutils.core.query_utils import QueryExecutor

from typeddfs import TypedDfs, TypedDf


from mandos.model import Api
from mandos.model.utils import CleverEnum
from mandos.model.settings import QUERY_EXECUTORS, MANDOS_SETTINGS
from mandos.model.utils.scrape import Scraper, By


class SarPredictionResult(CleverEnum):

    active = enum.auto()
    inactive = enum.auto()
    empty = enum.auto()
    both = enum.auto()

    @property
    def yes_no_mixed(self) -> str:

        return {
            SarPredictionResult.active: "yes",
            SarPredictionResult.inactive: "no",
            SarPredictionResult.empty: "mixed",
            SarPredictionResult.both: "mixed",
        }[self]

    @property
    def score(self) -> int:

        return {
            SarPredictionResult.active: 1,
            SarPredictionResult.inactive: -1,
            SarPredictionResult.empty: 0,
            SarPredictionResult.both: 0,
        }[self]


class ChemblScrapeTable(TypedDf, metaclass=abc.ABCMeta):

    """"""


def _parse_conf(df: pd.DataFrame):

    df = df.copy()
    for t in [70, 80, 90]:

        df[f"confidence_{t}"] = df[f"confidence_{t}"].map(SarPredictionResult.of)


ChemblTargetPredictionTable: TypedDf = (
    TypedDfs.typed("ChemblTargetPredictionTable")
    .subclass(ChemblScrapeTable)
    .require("target_chembl_id", "target_pref_name", "target_organism", dtype=str)
    .require("confidence_70", "confidence_80", "confidence_90", dtype=SarPredictionResult)
    .require("activity_threshold", dtype=float)
    .post(_parse_conf)
).build()


class ChemblScrapePage(CleverEnum):

    target_predictions = enum.auto()


class _ScraperSingleton:
    x = None

    @classmethod
    def get(cls, executor: QueryExecutor):

        if cls.x is None:
            cls.x = Scraper.create(executor)
        return cls.x


class ChemblScrapeApi(Api, metaclass=abc.ABCMeta):

    def fetch_predictions(self, cid: str) -> ChemblTargetPredictionTable:

        return self._fetch_page(
            cid, ChemblScrapePage.target_predictions, ChemblTargetPredictionTable
        )

    def _fetch_page(self, cid: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]):
        raise NotImplementedError()


class QueryingChemblScrapeApi(ChemblScrapeApi):

    def __init__(self, executor: QueryExecutor = QUERY_EXECUTORS.chembl):
        self._executor = executor

    def _fetch_page(

        self, chembl_id: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]

    ):
        url = f"https://www.ebi.ac.uk/chembl/embed/#compound_report_card/{chembl_id}/{page}"
        scraper = _ScraperSingleton.get(self._executor)
        scraper.go(url)
        rows = []
        i = 2
        while True:
            table = scraper.find_element("table", By.TAG_NAME)
            for tr in table.find_elements("tr"):

                rows += [td.text.strip() for td in tr.find_elements("td")]
            # noinspection PyBroadException
            try:
                scraper.find_elements(str(i), By.LINK_TEXT)
            except Exception:

                break
            i += 1
        header = rows[0]
        rows = rows[1:]
        return table_type.of(pd.DataFrame(rows, columns=header))


class CachingChemblScrapeApi(ChemblScrapeApi):

    def __init__(
        self,

        query: Optional[QueryingChemblScrapeApi],

        cache_dir: Path = MANDOS_SETTINGS.chembl_cache_path,

    ):
        self._cache_dir = cache_dir
        self._query = query

    def _fetch_page(self, cid: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]):
        path = self.path(cid, page)
        if path.exists():

            return ChemblScrapeTable.read_file(path)
        elif self._query is None:
            return ChemblScrapeTable.new_empty()
        data = self._query._fetch_page(cid, page, table_type)
class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
        path.parent.mkdir(exist_ok=True, parents=True)
        data.write_file(path)
        return data

    def path(self, cid: str, page: ChemblScrapePage):

        return (self._cache_dir / page.name / cid).with_suffix(
            MANDOS_SETTINGS.archive_filename_suffix
        )


__all__ = [
    "ChemblScrapeApi",
    "ChemblScrapePage",
    "ChemblScrapePage",
    "ChemblTargetPredictionTable",
    "QueryingChemblScrapeApi",
    "CachingChemblScrapeApi",
]


1			"""
2			API that web-scrapes ChEMBL.
3			"""
4			from __future__ import annotations
5
6			import abc
7			import enum
8			from pathlib import Path
9			from typing import Optional, Type
10
11			import pandas as pd
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'pandas' Loading history...
12			from pocketutils.core.query_utils import QueryExecutor
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'pocketutils.core.query_utils' Loading history...
13			from typeddfs import TypedDfs, TypedDf
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'typeddfs' Loading history...
14
15			from mandos.model import Api
16			from mandos.model.utils import CleverEnum
17			from mandos.model.settings import QUERY_EXECUTORS, MANDOS_SETTINGS
18			from mandos.model.utils.scrape import Scraper, By
19
20
21			class SarPredictionResult(CleverEnum):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
22			active = enum.auto()
23			inactive = enum.auto()
24			empty = enum.auto()
25			both = enum.auto()
26
27			@property
28			def yes_no_mixed(self) -> str:
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
29			return {
30			SarPredictionResult.active: "yes",
31			SarPredictionResult.inactive: "no",
32			SarPredictionResult.empty: "mixed",
33			SarPredictionResult.both: "mixed",
34			}[self]
35
36			@property
37			def score(self) -> int:
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
38			return {
39			SarPredictionResult.active: 1,
40			SarPredictionResult.inactive: -1,
41			SarPredictionResult.empty: 0,
42			SarPredictionResult.both: 0,
43			}[self]
44
45
46			class ChemblScrapeTable(TypedDf, metaclass=abc.ABCMeta):
			0 ignored issues – show Documentation introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Empty class docstring Loading history...
47			""""""
48
49
50			def _parse_conf(df: pd.DataFrame):
			0 ignored issues – show Coding Style Naming introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Argument name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
51			df = df.copy()
52			for t in [70, 80, 90]:
			0 ignored issues – show Coding Style Naming introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Variable name "t" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
53			df[f"confidence_{t}"] = df[f"confidence_{t}"].map(SarPredictionResult.of)
54
55
56			ChemblTargetPredictionTable: TypedDf = (
57			TypedDfs.typed("ChemblTargetPredictionTable")
58			.subclass(ChemblScrapeTable)
59			.require("target_chembl_id", "target_pref_name", "target_organism", dtype=str)
60			.require("confidence_70", "confidence_80", "confidence_90", dtype=SarPredictionResult)
61			.require("activity_threshold", dtype=float)
62			.post(_parse_conf)
63			).build()
64
65
66			class ChemblScrapePage(CleverEnum):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
67			target_predictions = enum.auto()
68
69
70			class _ScraperSingleton:
71			x = None
72
73			@classmethod
74			def get(cls, executor: QueryExecutor):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
75			if cls.x is None:
76			cls.x = Scraper.create(executor)
77			return cls.x
78
79
80			class ChemblScrapeApi(Api, metaclass=abc.ABCMeta):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
81			def fetch_predictions(self, cid: str) -> ChemblTargetPredictionTable:
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
82			return self._fetch_page(
83			cid, ChemblScrapePage.target_predictions, ChemblTargetPredictionTable
84			)
85
86			def _fetch_page(self, cid: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]):
87			raise NotImplementedError()
88
89
90			class QueryingChemblScrapeApi(ChemblScrapeApi):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
91			def __init__(self, executor: QueryExecutor = QUERY_EXECUTORS.chembl):
92			self._executor = executor
93
94			def _fetch_page(
			0 ignored issues – show Bug introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Parameters differ from overridden '_fetch_page' method Loading history...
95			self, chembl_id: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
96			):
97			url = f"https://www.ebi.ac.uk/chembl/embed/#compound_report_card/{chembl_id}/{page}"
98			scraper = _ScraperSingleton.get(self._executor)
99			scraper.go(url)
100			rows = []
101			i = 2
102			while True:
103			table = scraper.find_element("table", By.TAG_NAME)
104			for tr in table.find_elements("tr"):
			0 ignored issues – show Coding Style Naming introduced 2021-08-02 23:39 UTC by Report Bug Copy Issue Report Variable name "tr" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
105			rows += [td.text.strip() for td in tr.find_elements("td")]
106			# noinspection PyBroadException
107			try:
108			scraper.find_elements(str(i), By.LINK_TEXT)
109			except Exception:
			0 ignored issues – show Best Practice introduced 2021-08-02 23:39 UTC by Report Bug Copy Issue Report Catching very general exceptions such as `Exception` is usually not recommended. Generally, you would want to handle very specific errors in the exception handler. This ensure that you do not hide other types of errors which should be fixed. So, unless you specifically plan to handle any error, consider adding a more specific exception. Loading history...
110			break
111			i += 1
112			header = rows[0]
113			rows = rows[1:]
114			return table_type.of(pd.DataFrame(rows, columns=header))
115
116
117			class CachingChemblScrapeApi(ChemblScrapeApi):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
118			def __init__(
119			self,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
120			query: Optional[QueryingChemblScrapeApi],
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
121			cache_dir: Path = MANDOS_SETTINGS.chembl_cache_path,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
122			):
123			self._cache_dir = cache_dir
124			self._query = query
125
126			def _fetch_page(self, cid: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]):
127			path = self.path(cid, page)
128			if path.exists():
			0 ignored issues – show unused-code introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unnecessary "elif" after "return" Loading history...
129			return ChemblScrapeTable.read_file(path)
130			elif self._query is None:
131			return ChemblScrapeTable.new_empty()
132			data = self._query._fetch_page(cid, page, table_type)
			0 ignored issues – show Coding Style Best Practice introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report It seems like `_fetch_page` was declared protected and should not be accessed from this context. Prefixing a member variable `_` is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class: class MyParent: def __init__(self): self._x = 1; self.y = 2; class MyChild(MyParent): def some_method(self): return self._x # Ok, since accessed from a child class class AnotherClass: def some_method(self, instance_of_my_child): return instance_of_my_child._x # Would be flagged as AnotherClass is not # a child class of MyParent Loading history...
133			path.parent.mkdir(exist_ok=True, parents=True)
134			data.write_file(path)
135			return data
136
137			def path(self, cid: str, page: ChemblScrapePage):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
138			return (self._cache_dir / page.name / cid).with_suffix(
139			MANDOS_SETTINGS.archive_filename_suffix
140			)
141
142
143			__all__ = [
144			"ChemblScrapeApi",
145			"ChemblScrapePage",
146			"ChemblScrapePage",
147			"ChemblTargetPredictionTable",
148			"QueryingChemblScrapeApi",
149			"CachingChemblScrapeApi",
150			]
151

dmyersturnbull / mandos

Push — main ( 9813db...5006f2 )

mandos.model.apis.chembl_scrape_api A

Complexity

Size/Duplication

Importance

10 Methods

1 Function

Duplication Side-by-Side

Filter issues like