Passed
Push — main ( 9813db...5006f2 )
by Douglas
01:43
created

mandos.model.apis.chembl_scrape_api   A

Complexity

Total Complexity 18

Size/Duplication

Total Lines 150
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 111
dl 0
loc 150
rs 10
c 0
b 0
f 0
wmc 18

10 Methods

Rating   Name   Duplication   Size   Complexity  
A CachingChemblScrapeApi._fetch_page() 0 10 3
A _ScraperSingleton.get() 0 5 2
A QueryingChemblScrapeApi._fetch_page() 0 21 4
A ChemblScrapeApi._fetch_page() 0 2 1
A QueryingChemblScrapeApi.__init__() 0 2 1
A ChemblScrapeApi.fetch_predictions() 0 3 1
A CachingChemblScrapeApi.__init__() 0 7 1
A CachingChemblScrapeApi.path() 0 3 1
A SarPredictionResult.yes_no_mixed() 0 8 1
A SarPredictionResult.score() 0 8 1

1 Function

Rating   Name   Duplication   Size   Complexity  
A _parse_conf() 0 4 2
1
"""
2
API that web-scrapes ChEMBL.
3
"""
4
from __future__ import annotations
5
6
import abc
7
import enum
8
from pathlib import Path
9
from typing import Optional, Type
10
11
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
12
from pocketutils.core.query_utils import QueryExecutor
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.query_utils'
Loading history...
13
from typeddfs import TypedDfs, TypedDf
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
14
15
from mandos.model import Api
16
from mandos.model.utils import CleverEnum
17
from mandos.model.settings import QUERY_EXECUTORS, MANDOS_SETTINGS
18
from mandos.model.utils.scrape import Scraper, By
19
20
21
class SarPredictionResult(CleverEnum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
22
    active = enum.auto()
23
    inactive = enum.auto()
24
    empty = enum.auto()
25
    both = enum.auto()
26
27
    @property
28
    def yes_no_mixed(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
29
        return {
30
            SarPredictionResult.active: "yes",
31
            SarPredictionResult.inactive: "no",
32
            SarPredictionResult.empty: "mixed",
33
            SarPredictionResult.both: "mixed",
34
        }[self]
35
36
    @property
37
    def score(self) -> int:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
38
        return {
39
            SarPredictionResult.active: 1,
40
            SarPredictionResult.inactive: -1,
41
            SarPredictionResult.empty: 0,
42
            SarPredictionResult.both: 0,
43
        }[self]
44
45
46
class ChemblScrapeTable(TypedDf, metaclass=abc.ABCMeta):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
47
    """"""
48
49
50
def _parse_conf(df: pd.DataFrame):
0 ignored issues
show
Coding Style Naming introduced by
Argument name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
51
    df = df.copy()
52
    for t in [70, 80, 90]:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "t" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
53
        df[f"confidence_{t}"] = df[f"confidence_{t}"].map(SarPredictionResult.of)
54
55
56
ChemblTargetPredictionTable: TypedDf = (
57
    TypedDfs.typed("ChemblTargetPredictionTable")
58
    .subclass(ChemblScrapeTable)
59
    .require("target_chembl_id", "target_pref_name", "target_organism", dtype=str)
60
    .require("confidence_70", "confidence_80", "confidence_90", dtype=SarPredictionResult)
61
    .require("activity_threshold", dtype=float)
62
    .post(_parse_conf)
63
).build()
64
65
66
class ChemblScrapePage(CleverEnum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
67
    target_predictions = enum.auto()
68
69
70
class _ScraperSingleton:
71
    x = None
72
73
    @classmethod
74
    def get(cls, executor: QueryExecutor):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
75
        if cls.x is None:
76
            cls.x = Scraper.create(executor)
77
        return cls.x
78
79
80
class ChemblScrapeApi(Api, metaclass=abc.ABCMeta):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
81
    def fetch_predictions(self, cid: str) -> ChemblTargetPredictionTable:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
82
        return self._fetch_page(
83
            cid, ChemblScrapePage.target_predictions, ChemblTargetPredictionTable
84
        )
85
86
    def _fetch_page(self, cid: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]):
87
        raise NotImplementedError()
88
89
90
class QueryingChemblScrapeApi(ChemblScrapeApi):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
91
    def __init__(self, executor: QueryExecutor = QUERY_EXECUTORS.chembl):
92
        self._executor = executor
93
94
    def _fetch_page(
0 ignored issues
show
Bug introduced by
Parameters differ from overridden '_fetch_page' method
Loading history...
95
        self, chembl_id: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
96
    ):
97
        url = f"https://www.ebi.ac.uk/chembl/embed/#compound_report_card/{chembl_id}/{page}"
98
        scraper = _ScraperSingleton.get(self._executor)
99
        scraper.go(url)
100
        rows = []
101
        i = 2
102
        while True:
103
            table = scraper.find_element("table", By.TAG_NAME)
104
            for tr in table.find_elements("tr"):
0 ignored issues
show
Coding Style Naming introduced by
Variable name "tr" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
105
                rows += [td.text.strip() for td in tr.find_elements("td")]
106
            # noinspection PyBroadException
107
            try:
108
                scraper.find_elements(str(i), By.LINK_TEXT)
109
            except Exception:
0 ignored issues
show
Best Practice introduced by
Catching very general exceptions such as Exception is usually not recommended.

Generally, you would want to handle very specific errors in the exception handler. This ensure that you do not hide other types of errors which should be fixed.

So, unless you specifically plan to handle any error, consider adding a more specific exception.

Loading history...
110
                break
111
            i += 1
112
        header = rows[0]
113
        rows = rows[1:]
114
        return table_type.of(pd.DataFrame(rows, columns=header))
115
116
117
class CachingChemblScrapeApi(ChemblScrapeApi):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
118
    def __init__(
119
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
120
        query: Optional[QueryingChemblScrapeApi],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
121
        cache_dir: Path = MANDOS_SETTINGS.chembl_cache_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
122
    ):
123
        self._cache_dir = cache_dir
124
        self._query = query
125
126
    def _fetch_page(self, cid: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]):
127
        path = self.path(cid, page)
128
        if path.exists():
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
129
            return ChemblScrapeTable.read_file(path)
130
        elif self._query is None:
131
            return ChemblScrapeTable.new_empty()
132
        data = self._query._fetch_page(cid, page, table_type)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _fetch_page was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
133
        path.parent.mkdir(exist_ok=True, parents=True)
134
        data.write_file(path)
135
        return data
136
137
    def path(self, cid: str, page: ChemblScrapePage):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
138
        return (self._cache_dir / page.name / cid).with_suffix(
139
            MANDOS_SETTINGS.archive_filename_suffix
140
        )
141
142
143
__all__ = [
144
    "ChemblScrapeApi",
145
    "ChemblScrapePage",
146
    "ChemblScrapePage",
147
    "ChemblTargetPredictionTable",
148
    "QueryingChemblScrapeApi",
149
    "CachingChemblScrapeApi",
150
]
151