mandos.model.apis.chembl_scrape_api.QueryingChemblScrapeApi.By() - Code Metrics - Inspection of "fix: testing" - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( 4b9dc0...1b55d1 )

by Douglas

created 2021-11-05 00:26 UTC

QueryingChemblScrapeApi.By() A

↳ Parent: mandos.model.apis.chembl_scrape_api

Complexity

Conditions

Size

Total Lines	5
Code Lines	4

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
eloc	4
nop	1
dl	0
loc	5
rs	10
c	0
b	0
f	0

"""
API that web-scrapes ChEMBL.
"""
from __future__ import annotations

import abc
import enum
from functools import cached_property

from pathlib import Path
from typing import Optional, Type

import pandas as pd

from pocketutils.core.enums import CleverEnum

from pocketutils.core.query_utils import QueryExecutor

from typeddfs import TypedDf, TypedDfs


from mandos.model import Api
from mandos.model.settings import QUERY_EXECUTORS, SETTINGS
from mandos.model.utils.setup import logger


class SarPredictionResult(CleverEnum):

    active = enum.auto()
    inactive = enum.auto()
    empty = enum.auto()
    both = enum.auto()

    @property
    def yes_no_mixed(self) -> str:

        return {
            SarPredictionResult.active: "yes",
            SarPredictionResult.inactive: "no",
            SarPredictionResult.empty: "mixed",
            SarPredictionResult.both: "mixed",
        }[self]

    @property
    def score(self) -> int:

        return {
            SarPredictionResult.active: 1,
            SarPredictionResult.inactive: -1,
            SarPredictionResult.empty: 0,
            SarPredictionResult.both: 0,
        }[self]


class ChemblScrapeTable(TypedDf, metaclass=abc.ABCMeta):

    """"""


def _parse_conf(df: pd.DataFrame):

    df = df.copy()
    for t in [70, 80, 90]:

        df[f"confidence_{t}"] = df[f"confidence_{t}"].map(SarPredictionResult.of)


ChemblTargetPredictionTable = (
    TypedDfs.typed("ChemblTargetPredictionTable")
    .subclass(ChemblScrapeTable)
    .require("target_chembl_id", "target_pref_name", "target_organism", dtype=str)
    .require("confidence_70", "confidence_80", "confidence_90", dtype=SarPredictionResult)
    .require("activity_threshold", dtype=float)
    .post(_parse_conf)
    .strict()
    .secure()
    .hash(directory=True)
).build()


class ChemblScrapePage(CleverEnum):

    target_predictions = enum.auto()


class ChemblScrapeApi(Api, metaclass=abc.ABCMeta):

    def fetch_predictions(self, cid: str) -> ChemblTargetPredictionTable:

        return self._fetch_page(
            cid, ChemblScrapePage.target_predictions, ChemblTargetPredictionTable
        )

    def _fetch_page(self, cid: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]):
        raise NotImplementedError()


class QueryingChemblScrapeApi(ChemblScrapeApi):

    def __init__(self, executor: QueryExecutor = QUERY_EXECUTORS.chembl):
        self._executor = executor

    @property
    def scraper(self):

        return self.Scraper.create(self._executor)


    @cached_property
    def By(self):
class Foo:
    def some_method(self, x, y):
        return x + y;
        from mandos.model.utils.scrape import By


        return By

    @cached_property
    def Scraper(self):
class Foo:
    def some_method(self, x, y):
        return x + y;
        from mandos.model.utils.scrape import Scraper


        return Scraper

    def _fetch_page(

        self, chembl_id: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]

    ):
        url = f"https://www.ebi.ac.uk/chembl/embed/#compound_report_card/{chembl_id}/{page}"
        scraper = self.scraper
        scraper.go(url)
        rows = []
        i = 2
        while True:
            table = scraper.find_element("table", self.By.TAG_NAME)

            for tr in table.find_elements("tr"):

                rows += [td.text.strip() for td in tr.find_elements("td")]
            # noinspection PyBroadException
            try:
                scraper.find_elements(str(i), self.By.LINK_TEXT)

            except Exception:

                break
            i += 1
        header = rows[0]
        rows = rows[1:]
        return table_type.of(pd.DataFrame(rows, columns=header))


class CachingChemblScrapeApi(ChemblScrapeApi):

    def __init__(
        self,

        query: Optional[QueryingChemblScrapeApi],

        cache_dir: Path = SETTINGS.chembl_cache_path,

    ):
        self._cache_dir = cache_dir
        self._query = query

    def _fetch_page(self, cid: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]):
        path = self.path(cid, page)
        if path.exists():

            return ChemblScrapeTable.read_file(path)
        elif self._query is None:
            return ChemblScrapeTable.new_empty()
        data: TypedDf = self._query._fetch_page(cid, page, table_type)
class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
        data.write_file(path.resolve(), mkdirs=True)
        logger.debug(f"Scraped page {page} for {cid} with {len(data):,} rows")
        return data

    def path(self, cid: str, page: ChemblScrapePage):

        return (self._cache_dir / page.name / cid).with_suffix(SETTINGS.archive_filename_suffix)


__all__ = [
    "ChemblScrapeApi",
    "ChemblScrapePage",
    "ChemblScrapePage",
    "ChemblTargetPredictionTable",
    "QueryingChemblScrapeApi",
    "CachingChemblScrapeApi",
]


1			"""
2			API that web-scrapes ChEMBL.
3			"""
4			from __future__ import annotations
5
6			import abc
7			import enum
8			from functools import cached_property
			0 ignored issues – show Bug introduced 2021-11-05 00:32 UTC by Report Bug Copy Issue Report The name `cached_property` does not seem to exist in module `functools`. Loading history...
9			from pathlib import Path
10			from typing import Optional, Type
11
12			import pandas as pd
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'pandas' Loading history...
13			from pocketutils.core.enums import CleverEnum
			0 ignored issues – show introduced 2021-10-12 03:37 UTC by Report Bug Copy Issue Report Unable to import 'pocketutils.core.enums' Loading history...
14			from pocketutils.core.query_utils import QueryExecutor
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'pocketutils.core.query_utils' Loading history...
15			from typeddfs import TypedDf, TypedDfs
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'typeddfs' Loading history...
16
17			from mandos.model import Api
18			from mandos.model.settings import QUERY_EXECUTORS, SETTINGS
19			from mandos.model.utils.setup import logger
20
21
22			class SarPredictionResult(CleverEnum):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
23			active = enum.auto()
24			inactive = enum.auto()
25			empty = enum.auto()
26			both = enum.auto()
27
28			@property
29			def yes_no_mixed(self) -> str:
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
30			return {
31			SarPredictionResult.active: "yes",
32			SarPredictionResult.inactive: "no",
33			SarPredictionResult.empty: "mixed",
34			SarPredictionResult.both: "mixed",
35			}[self]
36
37			@property
38			def score(self) -> int:
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
39			return {
40			SarPredictionResult.active: 1,
41			SarPredictionResult.inactive: -1,
42			SarPredictionResult.empty: 0,
43			SarPredictionResult.both: 0,
44			}[self]
45
46
47			class ChemblScrapeTable(TypedDf, metaclass=abc.ABCMeta):
			0 ignored issues – show Documentation introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Empty class docstring Loading history...
48			""""""
49
50
51			def _parse_conf(df: pd.DataFrame):
			0 ignored issues – show Coding Style Naming introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Argument name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
52			df = df.copy()
53			for t in [70, 80, 90]:
			0 ignored issues – show Coding Style Naming introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Variable name "t" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
54			df[f"confidence_{t}"] = df[f"confidence_{t}"].map(SarPredictionResult.of)
55
56
57			ChemblTargetPredictionTable = (
58			TypedDfs.typed("ChemblTargetPredictionTable")
59			.subclass(ChemblScrapeTable)
60			.require("target_chembl_id", "target_pref_name", "target_organism", dtype=str)
61			.require("confidence_70", "confidence_80", "confidence_90", dtype=SarPredictionResult)
62			.require("activity_threshold", dtype=float)
63			.post(_parse_conf)
64			.strict()
65			.secure()
66			.hash(directory=True)
67			).build()
68
69
70			class ChemblScrapePage(CleverEnum):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
71			target_predictions = enum.auto()
72
73
74			class ChemblScrapeApi(Api, metaclass=abc.ABCMeta):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
75			def fetch_predictions(self, cid: str) -> ChemblTargetPredictionTable:
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
76			return self._fetch_page(
77			cid, ChemblScrapePage.target_predictions, ChemblTargetPredictionTable
78			)
79
80			def _fetch_page(self, cid: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]):
81			raise NotImplementedError()
82
83
84			class QueryingChemblScrapeApi(ChemblScrapeApi):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
85			def __init__(self, executor: QueryExecutor = QUERY_EXECUTORS.chembl):
86			self._executor = executor
87
88			@property
89			def scraper(self):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
90			return self.Scraper.create(self._executor)
			0 ignored issues – show Bug introduced 2021-11-05 00:32 UTC by Report Bug Copy Issue Report The Method `Scraper` does not seem to have a member named `create`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
91
92			@cached_property
93			def By(self):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history... Coding Style Naming introduced 2021-11-05 00:32 UTC by Report Bug Copy Issue Report Method name "By" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style introduced 2021-11-05 00:32 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
94			from mandos.model.utils.scrape import By
			0 ignored issues – show introduced 2021-11-05 00:32 UTC by Report Bug Copy Issue Report Import outside toplevel (mandos.model.utils.scrape.By) Loading history...
95
96			return By
97
98			@cached_property
99			def Scraper(self):
			0 ignored issues – show introduced 2021-11-05 00:32 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history... Coding Style introduced 2021-11-05 00:32 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history... Coding Style Naming introduced 2021-11-05 00:32 UTC by Report Bug Copy Issue Report Method name "Scraper" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
100			from mandos.model.utils.scrape import Scraper
			0 ignored issues – show introduced 2021-11-04 01:02 UTC by Report Bug Copy Issue Report Import outside toplevel (mandos.model.utils.scrape.Scraper) Loading history...
101
102			return Scraper
103
104			def _fetch_page(
			0 ignored issues – show Bug introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Parameters differ from overridden '_fetch_page' method Loading history...
105			self, chembl_id: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
106			):
107			url = f"https://www.ebi.ac.uk/chembl/embed/#compound_report_card/{chembl_id}/{page}"
108			scraper = self.scraper
109			scraper.go(url)
110			rows = []
111			i = 2
112			while True:
113			table = scraper.find_element("table", self.By.TAG_NAME)
			0 ignored issues – show Bug introduced 2021-11-05 00:32 UTC by Report Bug Copy Issue Report The Method `By` does not seem to have a member named `TAG_NAME`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
114			for tr in table.find_elements("tr"):
			0 ignored issues – show Coding Style Naming introduced 2021-08-02 23:39 UTC by Report Bug Copy Issue Report Variable name "tr" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
115			rows += [td.text.strip() for td in tr.find_elements("td")]
116			# noinspection PyBroadException
117			try:
118			scraper.find_elements(str(i), self.By.LINK_TEXT)
			0 ignored issues – show Bug introduced 2021-11-05 00:32 UTC by Report Bug Copy Issue Report The Method `By` does not seem to have a member named `LINK_TEXT`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
119			except Exception:
			0 ignored issues – show Best Practice introduced 2021-08-02 23:39 UTC by Report Bug Copy Issue Report Catching very general exceptions such as `Exception` is usually not recommended. Generally, you would want to handle very specific errors in the exception handler. This ensure that you do not hide other types of errors which should be fixed. So, unless you specifically plan to handle any error, consider adding a more specific exception. Loading history...
120			break
121			i += 1
122			header = rows[0]
123			rows = rows[1:]
124			return table_type.of(pd.DataFrame(rows, columns=header))
125
126
127			class CachingChemblScrapeApi(ChemblScrapeApi):
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
128			def __init__(
129			self,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
130			query: Optional[QueryingChemblScrapeApi],
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
131			cache_dir: Path = SETTINGS.chembl_cache_path,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
132			):
133			self._cache_dir = cache_dir
134			self._query = query
135
136			def _fetch_page(self, cid: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]):
137			path = self.path(cid, page)
138			if path.exists():
			0 ignored issues – show unused-code introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unnecessary "elif" after "return" Loading history...
139			return ChemblScrapeTable.read_file(path)
140			elif self._query is None:
141			return ChemblScrapeTable.new_empty()
142			data: TypedDf = self._query._fetch_page(cid, page, table_type)
			0 ignored issues – show Coding Style Best Practice introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report It seems like `_fetch_page` was declared protected and should not be accessed from this context. Prefixing a member variable `_` is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class: class MyParent: def __init__(self): self._x = 1; self.y = 2; class MyChild(MyParent): def some_method(self): return self._x # Ok, since accessed from a child class class AnotherClass: def some_method(self, instance_of_my_child): return instance_of_my_child._x # Would be flagged as AnotherClass is not # a child class of MyParent Loading history...
143			data.write_file(path.resolve(), mkdirs=True)
144			logger.debug(f"Scraped page {page} for {cid} with {len(data):,} rows")
145			return data
146
147			def path(self, cid: str, page: ChemblScrapePage):
			0 ignored issues – show introduced 2021-11-05 00:32 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
148			return (self._cache_dir / page.name / cid).with_suffix(SETTINGS.archive_filename_suffix)
149
150
151			__all__ = [
152			"ChemblScrapeApi",
153			"ChemblScrapePage",
154			"ChemblScrapePage",
155			"ChemblTargetPredictionTable",
156			"QueryingChemblScrapeApi",
157			"CachingChemblScrapeApi",
158			]
159

dmyersturnbull / mandos

Push — main ( 4b9dc0...1b55d1 )

QueryingChemblScrapeApi.By() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like