Passed
Push — main ( 2e1b6b...3a0c28 )
by Douglas
02:06
created

mandos.model.hit_dfs   A

Complexity

Total Complexity 11

Size/Duplication

Total Lines 79
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 63
dl 0
loc 79
rs 10
c 0
b 0
f 0
wmc 11

2 Functions

Rating   Name   Duplication   Size   Complexity  
A _from_hits() 0 11 3
B _to_hits() 0 29 8
1
from typing import Sequence
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
3
import numpy as np
0 ignored issues
show
introduced by
Unable to import 'numpy'
Loading history...
4
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
5
from pocketutils.core.exceptions import InjectionError
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.exceptions'
Loading history...
6
from typeddfs import TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
7
from typeddfs.abs_dfs import AbsDf
0 ignored issues
show
introduced by
Unable to import 'typeddfs.abs_dfs'
Loading history...
8
9
from mandos.model.concrete_hits import HIT_CLASSES
10
from mandos.model.hits import AbstractHit
11
from mandos.model.utils.setup import logger
12
13
14
def _from_hits(cls, hits: Sequence[AbstractHit]) -> AbsDf:
15
    data = []
16
    if len(hits) == 0:
17
        logger.debug(f"No hits")
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
18
        return cls.new_df()
19
    for hit in hits:
20
        x = {f: getattr(hit, f) for f in hit.__class__.fields()}
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
21
        x["universal_id"] = hit.universal_id
22
        x["hit_class"] = hit.hit_class
23
        data.append(x)
24
    return cls.of([pd.Series(x) for x in data])
25
26
27
def _to_hits(self: AbsDf) -> Sequence[AbstractHit]:
28
    hits = []
29
    for row in self.itertuples():
30
        # noinspection PyUnresolvedReferences
31
        c = row.hit_class
0 ignored issues
show
Coding Style Naming introduced by
Variable name "c" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
32
        # TODO: remove
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
33
        if c == "_DrugbankInteractionHit" and row.data_source == "drugbank:target-functions":
34
            c = "DrugbankGeneralFunctionHit"
0 ignored issues
show
Coding Style Naming introduced by
Variable name "c" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
35
        elif c == "_DrugbankInteractionHit" and row.data_source == "drugbank:targets":
36
            c = "DrugbankTargetHit"
0 ignored issues
show
Coding Style Naming introduced by
Variable name "c" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
37
        try:
38
            clazz = HIT_CLASSES[c]
39
        except KeyError:
40
            raise InjectionError(f"No hit class {c}") from None
41
        # ignore extra columns
42
        # if cols are missing, let it fail on clazz.__init__
43
        data = {f: getattr(row, f) for f in clazz.fields()}
44
        try:
45
            # noinspection PyArgumentList
46
            hit = clazz(**data)
47
        except ValueError:
48
            logger.debug(f"Data passed to {clazz}: {data}")
49
            raise InjectionError(
50
                f"Fields for {c} do not match:"
51
                + f" expected {', '.join(clazz.fields())};"
52
                + f" got {', '.join(data.keys())}"
53
            )
54
        hits.append(hit)
55
    return hits
56
57
58
HitDf = (
59
    TypedDfs.typed("HitDf")
60
    .require("record_id", dtype=str)
61
    .require("origin_inchikey", "matched_inchikey", dtype=str)
62
    .require("predicate", dtype=str)
63
    .require("object_id", "object_name", dtype=str)
64
    .require("search_key", "search_class", "data_source", dtype=str)
65
    .require("hit_class", dtype=str)
66
    .require("cache_date", "run_date")
67
    .reserve("inchi", "smiles", dtype=str)
68
    .reserve("compound_id", "compound_name", dtype=str)
69
    .reserve("chembl_id", "pubchem_id", dtype=str)
70
    .reserve("weight", dtype=np.float64)
71
    .add_classmethods(from_hits=_from_hits)
72
    .add_methods(to_hits=_to_hits)
73
    .strict(cols=False)
74
    .secure()
75
).build()
76
77
78
__all__ = ["HitDf"]
79