1
|
|
|
"""Manage optional GO-DAG attributes.""" |
2
|
|
|
|
3
|
|
|
__copyright__ = "Copyright (C) 2015-2018, DV Klopfenstein, H Tang, All rights reserved." |
4
|
|
|
__author__ = "DV Klopfenstein" |
5
|
|
|
|
6
|
|
|
import re |
7
|
|
|
import collections as cx |
8
|
|
|
|
9
|
|
|
|
10
|
|
|
class OboOptionalAttrs(object): |
11
|
|
|
"""Manage optional GO-DAG attributes.""" |
12
|
|
|
|
13
|
|
|
attributes = set(['def', 'defn', 'synonym', 'relationship', 'xref', 'subset', 'comment']) |
14
|
|
|
|
15
|
|
|
def __init__(self, optional_attrs): |
16
|
|
|
assert optional_attrs |
17
|
|
|
self.optional_attrs = optional_attrs |
18
|
|
|
self.attr2cmp = self._init_compile_patterns(optional_attrs) |
19
|
|
|
|
20
|
|
|
def update_rec(self, rec, line): |
21
|
|
|
"""Update current GOTerm with optional record.""" |
22
|
|
|
if 'def' in self.optional_attrs and line[:5] == "def: ": |
23
|
|
|
assert not hasattr(rec, 'defn'), "ATTR(defn) ALREADY SET({VAL})".format(VAL=rec.defn) |
24
|
|
|
# Use 'defn' because 'def' is a reserved word in python |
25
|
|
|
rec.defn = line[5:] |
26
|
|
|
elif 'synonym' in self.optional_attrs and line[:9] == "synonym: ": |
27
|
|
|
rec.synonym.append(self._get_synonym(line[9:])) |
28
|
|
|
# http://geneontology.org/page/ontology-relations |
29
|
|
|
elif 'relationship' in self.optional_attrs and line[:14] == "relationship: ": |
30
|
|
|
# relationships are stored in a dict of sets, mirroring |
31
|
|
|
# the structure implied in the GO DAG. Example: |
32
|
|
|
# |
33
|
|
|
# relationship = { |
34
|
|
|
# 'part_of': set(['GO:0021513', 'GO:0006310']), |
35
|
|
|
# 'regulates': set(['GO:0006313']), |
36
|
|
|
# 'negatively_regulates': set(['GO:0021910']), |
37
|
|
|
# 'positively_regulates': set(['GO:0006313']), |
38
|
|
|
# } |
39
|
|
|
rel, goid = line[14:].split()[:2] |
40
|
|
|
if rel not in rec.relationship: |
41
|
|
|
rec.relationship[rel] = set([goid]) |
42
|
|
|
else: |
43
|
|
|
rec.relationship[rel].add(goid) |
44
|
|
|
elif 'xref' in self.optional_attrs and line[:6] == "xref: ": |
45
|
|
|
rec.xref.add(self._get_xref(line[6:])) |
46
|
|
|
elif 'subset' in self.optional_attrs and line[:8] == "subset: ": |
47
|
|
|
rec.subset.add(line[8:]) |
48
|
|
|
elif 'comment' in self.optional_attrs and line[:9] == "comment: ": |
49
|
|
|
rec.comment = line[9:] |
50
|
|
|
|
51
|
|
|
def init_datamembers(self, rec): |
52
|
|
|
"""Initialize current GOTerm with data members for storing optional attributes.""" |
53
|
|
|
# pylint: disable=multiple-statements |
54
|
|
|
if 'synonym' in self.optional_attrs: rec.synonym = [] |
55
|
|
|
if 'xref' in self.optional_attrs: rec.xref = set() |
56
|
|
|
if 'subset' in self.optional_attrs: rec.subset = set() |
57
|
|
|
if 'comment' in self.optional_attrs: rec.comment = "" |
58
|
|
|
if 'relationship' in self.optional_attrs: |
59
|
|
|
rec.relationship = {} |
60
|
|
|
rec.relationship_rev = {} |
61
|
|
|
|
62
|
|
|
def _get_synonym(self, line): |
63
|
|
|
"""Given line, return optional attribute synonym value in a namedtuple.""" |
64
|
|
|
# Example synonyms: |
65
|
|
|
# "peptidase inhibitor complex" EXACT [GOC:bf, GOC:pr] |
66
|
|
|
# "regulation of postsynaptic cytosolic calcium levels" EXACT syngo_official_label [] |
67
|
|
|
# "tocopherol 13-hydroxylase activity" EXACT systematic_synonym [] |
68
|
|
|
mtch = self.attr2cmp['synonym'].match(line) |
69
|
|
|
text, scope, typename, dbxrefs, _ = mtch.groups() |
70
|
|
|
typename = typename.strip() |
71
|
|
|
dbxrefs = set(dbxrefs.split(', ')) if dbxrefs else set() |
72
|
|
|
return self.attr2cmp['synonym nt']._make([text, scope, typename, dbxrefs]) |
73
|
|
|
|
74
|
|
|
def _get_xref(self, line): |
75
|
|
|
"""Given line, return optional attribute xref value in a dict of sets.""" |
76
|
|
|
# Ex: Wikipedia:Zygotene |
77
|
|
|
# Ex: Reactome:REACT_22295 "Addition of a third mannose to ..." |
78
|
|
|
mtch = self.attr2cmp['xref'].match(line) |
79
|
|
|
return mtch.group(1).replace(' ', '') |
80
|
|
|
|
81
|
|
|
@staticmethod |
82
|
|
|
def _init_compile_patterns(optional_attrs): |
83
|
|
|
"""Compile search patterns for optional attributes if needed.""" |
84
|
|
|
attr2cmp = {} |
85
|
|
|
if optional_attrs is None: |
86
|
|
|
return attr2cmp |
87
|
|
|
# "peptidase inhibitor complex" EXACT [GOC:bf, GOC:pr] |
88
|
|
|
# "blood vessel formation from pre-existing blood vessels" EXACT systematic_synonym [] |
89
|
|
|
# "mitochondrial inheritance" EXACT [] |
90
|
|
|
# "tricarboxylate transport protein" RELATED [] {comment="WIkipedia:Mitochondrial_carrier"} |
91
|
|
|
if 'synonym' in optional_attrs: |
92
|
|
|
attr2cmp['synonym'] = re.compile(r'"(\S.*\S)" ([A-Z]+) (.*)\[(.*)\](.*)$') |
93
|
|
|
attr2cmp['synonym nt'] = cx.namedtuple("synonym", "text scope typename dbxrefs") |
94
|
|
|
# Wikipedia:Zygotene |
95
|
|
|
# Reactome:REACT_27267 "DHAP from Ery4P and PEP, Mycobacterium tuberculosis" |
96
|
|
|
if 'xref' in optional_attrs: |
97
|
|
|
attr2cmp['xref'] = re.compile(r'^(\S+:\s*\S+)\b(.*)$') |
98
|
|
|
return attr2cmp |
99
|
|
|
|
100
|
|
|
@staticmethod |
101
|
|
|
def get_optional_attrs(optional_attrs): |
102
|
|
|
"""Prepare to store data from user-desired optional fields. |
103
|
|
|
|
104
|
|
|
Not loading these optional fields by default saves in space and speed. |
105
|
|
|
But allow the possibility for saving these fields, if the user desires, |
106
|
|
|
Including: |
107
|
|
|
comment consider def is_class_level is_metadata_tag is_transitive |
108
|
|
|
relationship replaced_by subset synonym transitive_over xref |
109
|
|
|
""" |
110
|
|
|
attrs_opt = set(['def', 'defn', 'synonym', 'relationship', 'xref', 'subset', 'comment']) |
111
|
|
|
# Required attributes are always loaded. All others are optionally loaded. |
112
|
|
|
# Allow user to specify either: 'def' or 'defn' |
113
|
|
|
# 'def' is an obo field name, but 'defn' is legal Python attribute name |
114
|
|
|
getnm = lambda aopt: aopt if aopt != "defn" else "def" |
115
|
|
|
# pylint: disable=redefined-variable-type |
116
|
|
|
opts = None |
117
|
|
|
if isinstance(optional_attrs, str) and optional_attrs in attrs_opt: |
118
|
|
|
opts = set([getnm(optional_attrs)]) |
119
|
|
|
else: |
120
|
|
|
opts = set([getnm(f) for f in optional_attrs if f in attrs_opt]) |
121
|
|
|
if opts: |
122
|
|
|
return opts |
123
|
|
|
|
124
|
|
|
|
125
|
|
|
# Copyright (C) 2015-2018, DV Klopfenstein, H Tang, All rights reserved. |
126
|
|
|
|