Passed
Push — main ( ec3fe3...82dd22 )
by Douglas
02:00
created

mandos.model.hits.HitUtils.hits_to_df()   A

Complexity

Conditions 2

Size

Total Lines 9
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 9
nop 2
dl 0
loc 9
rs 9.95
c 0
b 0
f 0
1
import dataclasses
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import html
3
from dataclasses import dataclass
4
from datetime import datetime
5
from typing import Optional, Sequence
6
7
from typeddfs import TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
8
9
HIT_FIELD_TYPE = frozenset([str, int, float, datetime])
10
11
12
@dataclass(frozen=True, repr=True, order=True)
13
class KeyPredObj:
14
    """
15
    Predicate, object pairs.
16
    """
17
18
    pred: str
19
    obj: str
20
    key: str
21
22
23
@dataclass(frozen=True, repr=True, order=True)
24
class KeyPredObjSource:
25
    """
26
    Predicate, object pairs.
27
    """
28
29
    pred: str
30
    obj: str
31
    key: str
32
    source: str
33
34
    @property
35
    def to_key_pred_obj(self) -> KeyPredObj:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
36
        return KeyPredObj(self.pred, self.obj, self.key)
37
38
39
@dataclass(frozen=True, repr=True, order=True)
40
class Triple:
41
    """
42
    Usually compound, predicate, object. Also includes the search key, if meaningful.
43
    """
44
45
    sub: str
46
    pred: str
47
    obj: str
48
    key: Optional[str]
49
50
    @property
51
    def n_triples(self) -> str:
52
        """
53
        Returns a simple text statement in n-triples format.
54
        Includes the key if it's present.
55
        """
56
        s = self.sub
0 ignored issues
show
Coding Style Naming introduced by
Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
57
        if self.key is None:
58
            p = html.escape(self.pred, quote=True)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "p" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
59
        else:
60
            p = html.escape(self.key + ":" + self.pred, quote=True)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "p" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
61
        o = html.escape(self.obj, quote=True)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "o" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
62
        return f'"{s}" "{p}" "{o}" .'
63
64
65
@dataclass(frozen=True, order=True, repr=True)
0 ignored issues
show
best-practice introduced by
Too many instance attributes (14/7)
Loading history...
66
class AbstractHit:
67
    """
68
    An abstract annotation (statement type), which may support additional fields.
69
    """
70
71
    record_id: Optional[str]
72
    origin_inchikey: str
73
    matched_inchikey: str
74
    compound_id: str
75
    compound_name: str
76
    predicate: str
77
    object_id: str
78
    object_name: str
79
    weight: float
80
    search_key: str
81
    search_class: str
82
    data_source: str
83
    run_date: datetime
84
    cache_date: Optional[datetime]
85
86
    @property
87
    def hit_class(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
88
        return self.__class__.__name__
89
90
    @property
91
    def to_triple(self) -> Triple:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
92
        return Triple(
93
            sub=self.origin_inchikey, pred=self.predicate, obj=self.object_name, key=self.search_key
94
        )
95
96
    @property
97
    def to_key_pred_obj(self) -> KeyPredObj:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
98
        return KeyPredObj(pred=self.predicate, obj=self.object_name, key=self.search_key)
99
100
    @property
101
    def to_key_pred_obj_source(self) -> KeyPredObjSource:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
102
        return KeyPredObjSource(
103
            pred=self.predicate, obj=self.object_name, key=self.search_key, source=self.data_source
104
        )
105
106
    def __hash__(self):
107
        return hash(self.record_id)
108
109
    @property
110
    def universal_id(self) -> str:
111
        """
112
        Gets an identifier (a hex key) that uniquely identifies the record by its unique attributes.
113
        Does **NOT** distinguish between hits with duplicate information and does **NOT**
114
        include ``record_id``.
115
116
        Returns:
117
            A 16-character hexadecimal string
118
        """
119
        # excluding record_id only because it's not available for some hit types
120
        # we'd rather immediately see duplicates if the exist
121
        # TODO: cache instead
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
122
        fields = {
123
            field
124
            for field in self.fields()
125
            if field
126
            not in {"record_id", "origin_inchikey", "compound_name", "search_key", "search_class"}
127
        }
128
        hexed = hex(hash(tuple([getattr(self, f) for f in fields])))
129
        # remove negative signs -- still unique
130
        return hexed.replace("-", "").replace("0x", "")
131
132
    @classmethod
133
    def fields(cls) -> Sequence[str]:
134
        """
135
        Finds the list of fields in this class by reflection.
136
        """
137
        return [f.name for f in dataclasses.fields(cls)]
138
139
140
HitFrame = (
141
    TypedDfs.typed("HitFrame")
142
    .require("record_id", dtype=str)
143
    .require("origin_inchikey", "matched_inchikey", "compound_id", "compound_name", dtype=str)
144
    .require("predicate", dtype=str)
145
    .require("object_id", "object_name", dtype=str)
146
    .require("search_key", "search_class", "data_source", dtype=str)
147
    .require("hit_class", dtype=str)
148
    .require("cache_date", "run_date")
149
).build()
150
151
__all__ = ["AbstractHit", "HitFrame", "KeyPredObj", "KeyPredObjSource", "Triple", "HIT_FIELD_TYPE"]
152