Total Complexity | 40 |
Total Lines | 154 |
Duplicated Lines | 12.99 % |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like mandos.search.pubchem.drugbank_ddi_search often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | import re |
||
|
|||
2 | from dataclasses import dataclass |
||
3 | from typing import Optional, Sequence, Tuple |
||
4 | |||
5 | from loguru import logger |
||
6 | |||
7 | from mandos.model import MiscUtils |
||
8 | from mandos.model.apis.pubchem_support.pubchem_models import DrugbankDdi |
||
9 | from mandos.search.pubchem import PubchemHit, PubchemSearch |
||
10 | |||
11 | |||
12 | @dataclass(frozen=True, order=True, repr=True) |
||
13 | class DrugbankDdiHit(PubchemHit): |
||
14 | """ """ |
||
15 | |||
16 | type: str |
||
17 | effect_target: Optional[str] |
||
18 | change: Optional[str] |
||
19 | description: str |
||
20 | |||
21 | |||
22 | class DrugbankDdiSearch(PubchemSearch[DrugbankDdiHit]): |
||
23 | """ """ |
||
24 | |||
25 | @property |
||
26 | def data_source(self) -> str: |
||
27 | return "DrugBank :: drug/drug interactions" |
||
28 | |||
29 | def find(self, inchikey: str) -> Sequence[DrugbankDdiHit]: |
||
30 | data = self.api.fetch_data(inchikey) |
||
31 | hits = [] |
||
32 | for dd in data.biomolecular_interactions_and_pathways.drugbank_ddis: |
||
33 | kind = self._guess_type(dd.description) |
||
34 | up_or_down = self._guess_up_down(dd.description) |
||
35 | spec, predicate, statement = self._guess_predicate(dd, kind, up_or_down) |
||
36 | hits.append( |
||
37 | self._create_hit( |
||
38 | inchikey=inchikey, |
||
39 | c_id=str(data.cid), |
||
40 | c_origin=inchikey, |
||
41 | c_matched=data.names_and_identifiers.inchikey, |
||
42 | c_name=data.name, |
||
43 | predicate=predicate, |
||
44 | statement=statement, |
||
45 | object_id=dd.drug_drugbank_id, |
||
46 | object_name=dd.drug_drugbank_id, |
||
47 | type=kind, |
||
48 | effect_target=spec, |
||
49 | change=up_or_down, |
||
50 | description=dd.description, |
||
51 | ) |
||
52 | ) |
||
53 | return hits |
||
54 | |||
55 | def _guess_predicate( |
||
56 | self, dd: DrugbankDdi, kind: str, up_or_down: str |
||
57 | ) -> Optional[Tuple[str, str, str]]: |
||
58 | spec, predicate, statement = None, None, None |
||
59 | if kind == "risk": |
||
60 | spec = self._guess_adverse(dd.description) |
||
61 | predicate = f"interaction:{kind}:risk:{up_or_down}:{spec}" |
||
62 | statement = f"{kind} :: {up_or_down}s risk of {spec} with" |
||
63 | elif kind == "activity": |
||
64 | spec = self._guess_activity(dd.description) |
||
65 | predicate = f"interaction:{kind}:activity:{up_or_down}:{spec}" |
||
66 | statement = f"{kind} :: {up_or_down}s {spec} activity with" |
||
67 | elif kind == "PK": |
||
68 | spec = self._guess_pk(dd.description) |
||
69 | predicate = f"interaction:{kind}:pk:{up_or_down}:{spec}" |
||
70 | statement = f"{kind} :: {up_or_down}s {spec} with" |
||
71 | elif kind == "efficacy": |
||
72 | spec = self._guess_efficacy(dd.description) |
||
73 | predicate = f"interaction:{kind}:efficacy:{up_or_down}:{spec}" |
||
74 | statement = f"{kind} :: {up_or_down}s efficacy of {spec} with" |
||
75 | if spec is None: |
||
76 | logger.info(f"Did not extract info from '{dd.description}'") |
||
77 | return None |
||
78 | return spec, predicate, statement |
||
79 | |||
80 | def _guess_up_down(self, desc: str) -> str: |
||
81 | if "increase" in desc: |
||
82 | return "increase" |
||
83 | elif "decrease" in desc: |
||
84 | return "decrease" |
||
85 | return "change" |
||
86 | |||
87 | def _guess_efficacy(self, desc: str) -> Optional[str]: |
||
88 | match = re.compile("efficacy of (.+)").search(desc) |
||
89 | if match is None or match.group(1) is None: |
||
90 | return None |
||
91 | split = match.group(1).split(" can") |
||
92 | if len(split) != 2: |
||
93 | return None |
||
94 | return split[0].strip() |
||
95 | |||
96 | View Code Duplication | def _guess_activity(self, desc: str) -> Optional[str]: |
|
97 | match = re.compile("may increase the (.+)").search(desc) |
||
98 | if match is None or match.group(1) is None: |
||
99 | match = re.compile("may decrease the (.+)").search(desc) |
||
100 | if match is None or match.group(1) is None: |
||
101 | return None |
||
102 | split = re.compile("activities").split(match.group(1)) |
||
103 | if len(split) != 2: |
||
104 | return None |
||
105 | return split[0].strip() |
||
106 | |||
107 | View Code Duplication | def _guess_adverse(self, desc: str) -> Optional[str]: |
|
108 | match = re.compile(" risk or severity of (.+)").search(desc) |
||
109 | if match is None or match.group(1) is None: |
||
110 | match = re.compile(" risk of (.+)").search(desc) |
||
111 | if match is None or match.group(1) is None: |
||
112 | return None |
||
113 | split = re.compile(" (?:can)|(?:may) be").split(match.group(1)) |
||
114 | if len(split) != 2: |
||
115 | return None |
||
116 | return split[0].strip() |
||
117 | |||
118 | def _guess_pk(self, desc: str) -> Optional[str]: |
||
119 | match = re.compile("^The (.+)").search(desc) |
||
120 | if match is not None and match.group(1) is not None: |
||
121 | split = re.compile("can be").split(match.group(1)) |
||
122 | if len(split) == 2: |
||
123 | return split[0].strip() |
||
124 | # try another way |
||
125 | match = re.compile("may increase the (.+)").search(desc) |
||
126 | if match is None or match.group(1) is None: |
||
127 | match = re.compile("may decrease the (.+)").search(desc) |
||
128 | if match is None or match.group(1) is None: |
||
129 | return None |
||
130 | split = re.compile("which").split(match.group(1)) |
||
131 | if len(split) != 2: |
||
132 | return None |
||
133 | return split[0].strip() |
||
134 | |||
135 | def _guess_type(self, desc: str) -> str: |
||
136 | for k, v in { |
||
137 | "serum concentration": "PK", |
||
138 | "metabolism": "PK", |
||
139 | "absorption": "PK", |
||
140 | "excretion": "PK", |
||
141 | "risk": "risk", |
||
142 | "severity": "risk", |
||
143 | "adverse": "risk", |
||
144 | "activities": "activity", |
||
145 | "activity": "activity", |
||
146 | "efficacy": "efficacy", |
||
147 | }.items(): |
||
148 | if k in desc: |
||
149 | return v |
||
150 | return "unknown" |
||
151 | |||
152 | |||
153 | __all__ = ["DrugbankDdiHit", "DrugbankDdiSearch"] |
||
154 |