Passed
Push — main ( 4b9dc0...1b55d1 )
by Douglas
06:16 queued 02:32
created

QueryingChemblScrapeApi.By()   A

Complexity

Conditions 1

Size

Total Lines 5
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 4
nop 1
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
"""
2
API that web-scrapes ChEMBL.
3
"""
4
from __future__ import annotations
5
6
import abc
7
import enum
8
from functools import cached_property
0 ignored issues
show
Bug introduced by
The name cached_property does not seem to exist in module functools.
Loading history...
9
from pathlib import Path
10
from typing import Optional, Type
11
12
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
13
from pocketutils.core.enums import CleverEnum
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.enums'
Loading history...
14
from pocketutils.core.query_utils import QueryExecutor
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.query_utils'
Loading history...
15
from typeddfs import TypedDf, TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
16
17
from mandos.model import Api
18
from mandos.model.settings import QUERY_EXECUTORS, SETTINGS
19
from mandos.model.utils.setup import logger
20
21
22
class SarPredictionResult(CleverEnum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
23
    active = enum.auto()
24
    inactive = enum.auto()
25
    empty = enum.auto()
26
    both = enum.auto()
27
28
    @property
29
    def yes_no_mixed(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
30
        return {
31
            SarPredictionResult.active: "yes",
32
            SarPredictionResult.inactive: "no",
33
            SarPredictionResult.empty: "mixed",
34
            SarPredictionResult.both: "mixed",
35
        }[self]
36
37
    @property
38
    def score(self) -> int:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
39
        return {
40
            SarPredictionResult.active: 1,
41
            SarPredictionResult.inactive: -1,
42
            SarPredictionResult.empty: 0,
43
            SarPredictionResult.both: 0,
44
        }[self]
45
46
47
class ChemblScrapeTable(TypedDf, metaclass=abc.ABCMeta):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
48
    """"""
49
50
51
def _parse_conf(df: pd.DataFrame):
0 ignored issues
show
Coding Style Naming introduced by
Argument name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
52
    df = df.copy()
53
    for t in [70, 80, 90]:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "t" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
54
        df[f"confidence_{t}"] = df[f"confidence_{t}"].map(SarPredictionResult.of)
55
56
57
ChemblTargetPredictionTable = (
58
    TypedDfs.typed("ChemblTargetPredictionTable")
59
    .subclass(ChemblScrapeTable)
60
    .require("target_chembl_id", "target_pref_name", "target_organism", dtype=str)
61
    .require("confidence_70", "confidence_80", "confidence_90", dtype=SarPredictionResult)
62
    .require("activity_threshold", dtype=float)
63
    .post(_parse_conf)
64
    .strict()
65
    .secure()
66
    .hash(directory=True)
67
).build()
68
69
70
class ChemblScrapePage(CleverEnum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
71
    target_predictions = enum.auto()
72
73
74
class ChemblScrapeApi(Api, metaclass=abc.ABCMeta):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
75
    def fetch_predictions(self, cid: str) -> ChemblTargetPredictionTable:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
76
        return self._fetch_page(
77
            cid, ChemblScrapePage.target_predictions, ChemblTargetPredictionTable
78
        )
79
80
    def _fetch_page(self, cid: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]):
81
        raise NotImplementedError()
82
83
84
class QueryingChemblScrapeApi(ChemblScrapeApi):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
85
    def __init__(self, executor: QueryExecutor = QUERY_EXECUTORS.chembl):
86
        self._executor = executor
87
88
    @property
89
    def scraper(self):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
90
        return self.Scraper.create(self._executor)
0 ignored issues
show
Bug introduced by
The Method Scraper does not seem to have a member named create.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
91
92
    @cached_property
93
    def By(self):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Method name "By" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
94
        from mandos.model.utils.scrape import By
0 ignored issues
show
introduced by
Import outside toplevel (mandos.model.utils.scrape.By)
Loading history...
95
96
        return By
97
98
    @cached_property
99
    def Scraper(self):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
Coding Style Naming introduced by
Method name "Scraper" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
100
        from mandos.model.utils.scrape import Scraper
0 ignored issues
show
introduced by
Import outside toplevel (mandos.model.utils.scrape.Scraper)
Loading history...
101
102
        return Scraper
103
104
    def _fetch_page(
0 ignored issues
show
Bug introduced by
Parameters differ from overridden '_fetch_page' method
Loading history...
105
        self, chembl_id: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
106
    ):
107
        url = f"https://www.ebi.ac.uk/chembl/embed/#compound_report_card/{chembl_id}/{page}"
108
        scraper = self.scraper
109
        scraper.go(url)
110
        rows = []
111
        i = 2
112
        while True:
113
            table = scraper.find_element("table", self.By.TAG_NAME)
0 ignored issues
show
Bug introduced by
The Method By does not seem to have a member named TAG_NAME.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
114
            for tr in table.find_elements("tr"):
0 ignored issues
show
Coding Style Naming introduced by
Variable name "tr" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
115
                rows += [td.text.strip() for td in tr.find_elements("td")]
116
            # noinspection PyBroadException
117
            try:
118
                scraper.find_elements(str(i), self.By.LINK_TEXT)
0 ignored issues
show
Bug introduced by
The Method By does not seem to have a member named LINK_TEXT.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
119
            except Exception:
0 ignored issues
show
Best Practice introduced by
Catching very general exceptions such as Exception is usually not recommended.

Generally, you would want to handle very specific errors in the exception handler. This ensure that you do not hide other types of errors which should be fixed.

So, unless you specifically plan to handle any error, consider adding a more specific exception.

Loading history...
120
                break
121
            i += 1
122
        header = rows[0]
123
        rows = rows[1:]
124
        return table_type.of(pd.DataFrame(rows, columns=header))
125
126
127
class CachingChemblScrapeApi(ChemblScrapeApi):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
128
    def __init__(
129
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
130
        query: Optional[QueryingChemblScrapeApi],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
131
        cache_dir: Path = SETTINGS.chembl_cache_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
132
    ):
133
        self._cache_dir = cache_dir
134
        self._query = query
135
136
    def _fetch_page(self, cid: str, page: ChemblScrapePage, table_type: Type[ChemblScrapeTable]):
137
        path = self.path(cid, page)
138
        if path.exists():
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
139
            return ChemblScrapeTable.read_file(path)
140
        elif self._query is None:
141
            return ChemblScrapeTable.new_empty()
142
        data: TypedDf = self._query._fetch_page(cid, page, table_type)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _fetch_page was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
143
        data.write_file(path.resolve(), mkdirs=True)
144
        logger.debug(f"Scraped page {page} for {cid} with {len(data):,} rows")
145
        return data
146
147
    def path(self, cid: str, page: ChemblScrapePage):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
148
        return (self._cache_dir / page.name / cid).with_suffix(SETTINGS.archive_filename_suffix)
149
150
151
__all__ = [
152
    "ChemblScrapeApi",
153
    "ChemblScrapePage",
154
    "ChemblScrapePage",
155
    "ChemblTargetPredictionTable",
156
    "QueryingChemblScrapeApi",
157
    "CachingChemblScrapeApi",
158
]
159