1
|
|
|
""" |
2
|
|
|
Definitions of input types for analysis. |
3
|
|
|
""" |
4
|
|
|
|
5
|
|
|
import math |
|
|
|
|
6
|
|
|
from collections import defaultdict |
|
|
|
|
7
|
|
|
from itertools import cycle |
|
|
|
|
8
|
|
|
from typing import Collection, Dict, Sequence, Set, Tuple, Union, Optional |
|
|
|
|
9
|
|
|
|
10
|
|
|
import numpy as np |
|
|
|
|
11
|
|
|
import pandas as pd |
|
|
|
|
12
|
|
|
from typeddfs import TypedDfs |
|
|
|
|
13
|
|
|
|
14
|
|
|
|
15
|
|
|
def _to_long_form(self, kind: str, key: str): |
16
|
|
|
if kind not in ["phi", "psi"]: |
17
|
|
|
raise ValueError(f"'type' should be 'phi' or 'psi', not {kind}") |
18
|
|
|
df = self.long_form() |
|
|
|
|
19
|
|
|
df = df.rename(columns=dict(row="inchikey_1", column="inchikey_2")) |
|
|
|
|
20
|
|
|
df["type"] = kind |
21
|
|
|
df["key"] = key |
22
|
|
|
return SimilarityDfLongForm.convert(df) |
23
|
|
|
|
24
|
|
|
|
25
|
|
|
SimilarityDfLongForm = ( |
26
|
|
|
TypedDfs.typed("SimilarityDfLongForm") |
27
|
|
|
.require("inchikey_1", "inchikey_2", dtype=str) |
28
|
|
|
.require("type", "key", dtype=str) |
29
|
|
|
.require("value", dtype=float) |
30
|
|
|
.reserve("sample", dtype=int) |
31
|
|
|
.strict() |
32
|
|
|
).build() |
33
|
|
|
|
34
|
|
|
SimilarityDfShortForm = ( |
35
|
|
|
TypedDfs.affinity_matrix("SimilarityDfShortForm").add_methods(to_long_form=_to_long_form) |
36
|
|
|
).build() |
37
|
|
|
|
38
|
|
|
ScoreDf = ( |
39
|
|
|
TypedDfs.typed("InputScoreFrame") |
40
|
|
|
.require("inchikey", "score_name", dtype=str) |
41
|
|
|
.require("score_value", dtype=float) |
42
|
|
|
).build() |
43
|
|
|
|
44
|
|
|
|
45
|
|
|
EnrichmentDf = ( |
46
|
|
|
TypedDfs.typed("EnrichmentFrame") |
47
|
|
|
.require("predicate", "object", "key", "source", dtype=str) |
48
|
|
|
.require("score_name", dtype=str) |
49
|
|
|
.require("value", "inverse", dtype=float) |
50
|
|
|
.reserve("sample", dtype=int) |
51
|
|
|
).build() |
52
|
|
|
|
53
|
|
|
|
54
|
|
|
ConcordanceDf = ( |
55
|
|
|
TypedDfs.typed("ConcordanceDf") |
56
|
|
|
.require("phi", "psi", dtype=str) |
57
|
|
|
.require("tau", dtype=float) |
58
|
|
|
.reserve("sample", dtype=int) |
59
|
|
|
).build() |
60
|
|
|
|
61
|
|
|
|
62
|
|
|
PsiProjectedDf = ( |
63
|
|
|
TypedDfs.typed("PsiProjectedDf") |
64
|
|
|
.require("psi", dtype=str) |
65
|
|
|
.require("inchikey", dtype=str) |
66
|
|
|
.require("x", "y", dtype=float) |
67
|
|
|
.reserve("color", "marker", dtype=str) |
68
|
|
|
).build() |
69
|
|
|
|