|
1
|
|
|
"""Manage optional GO-DAG attributes.""" |
|
2
|
|
|
|
|
3
|
|
|
__copyright__ = "Copyright (C) 2015-2018, DV Klopfenstein, H Tang, All rights reserved." |
|
4
|
|
|
__author__ = "DV Klopfenstein" |
|
5
|
|
|
|
|
6
|
|
|
import re |
|
7
|
|
|
import collections as cx |
|
8
|
|
|
|
|
9
|
|
|
|
|
10
|
|
|
class OboOptionalAttrs(object): |
|
11
|
|
|
"""Manage optional GO-DAG attributes.""" |
|
12
|
|
|
|
|
13
|
|
|
attributes = set(['def', 'defn', 'synonym', 'relationship', 'xref', 'subset', 'comment']) |
|
14
|
|
|
|
|
15
|
|
|
def __init__(self, optional_attrs): |
|
16
|
|
|
assert optional_attrs |
|
17
|
|
|
self.optional_attrs = optional_attrs |
|
18
|
|
|
self.attr2cmp = self._init_compile_patterns(optional_attrs) |
|
19
|
|
|
|
|
20
|
|
|
def update_rec(self, rec, line): |
|
21
|
|
|
"""Update current GOTerm with optional record.""" |
|
22
|
|
|
if 'def' in self.optional_attrs and line[:5] == "def: ": |
|
23
|
|
|
assert not hasattr(rec, 'defn'), "ATTR(defn) ALREADY SET({VAL})".format(VAL=rec.defn) |
|
24
|
|
|
# Use 'defn' because 'def' is a reserved word in python |
|
25
|
|
|
rec.defn = line[5:] |
|
26
|
|
|
elif 'synonym' in self.optional_attrs and line[:9] == "synonym: ": |
|
27
|
|
|
rec.synonym.append(self._get_synonym(line[9:])) |
|
28
|
|
|
# http://geneontology.org/page/ontology-relations |
|
29
|
|
|
elif 'relationship' in self.optional_attrs and line[:14] == "relationship: ": |
|
30
|
|
|
# relationships are stored in a dict of sets, mirroring |
|
31
|
|
|
# the structure implied in the GO DAG. Example: |
|
32
|
|
|
# |
|
33
|
|
|
# relationship = { |
|
34
|
|
|
# 'part_of': set(['GO:0021513', 'GO:0006310']), |
|
35
|
|
|
# 'regulates': set(['GO:0006313']), |
|
36
|
|
|
# 'negatively_regulates': set(['GO:0021910']), |
|
37
|
|
|
# 'positively_regulates': set(['GO:0006313']), |
|
38
|
|
|
# } |
|
39
|
|
|
rel, goid = line[14:].split()[:2] |
|
40
|
|
|
if rel not in rec.relationship: |
|
41
|
|
|
rec.relationship[rel] = set([goid]) |
|
42
|
|
|
else: |
|
43
|
|
|
rec.relationship[rel].add(goid) |
|
44
|
|
|
elif 'xref' in self.optional_attrs and line[:6] == "xref: ": |
|
45
|
|
|
rec.xref.add(self._get_xref(line[6:])) |
|
46
|
|
|
elif 'subset' in self.optional_attrs and line[:8] == "subset: ": |
|
47
|
|
|
rec.subset.add(line[8:]) |
|
48
|
|
|
elif 'comment' in self.optional_attrs and line[:9] == "comment: ": |
|
49
|
|
|
rec.comment = line[9:] |
|
50
|
|
|
|
|
51
|
|
|
def init_datamembers(self, rec): |
|
52
|
|
|
"""Initialize current GOTerm with data members for storing optional attributes.""" |
|
53
|
|
|
# pylint: disable=multiple-statements |
|
54
|
|
|
if 'synonym' in self.optional_attrs: rec.synonym = [] |
|
55
|
|
|
if 'xref' in self.optional_attrs: rec.xref = set() |
|
56
|
|
|
if 'subset' in self.optional_attrs: rec.subset = set() |
|
57
|
|
|
if 'comment' in self.optional_attrs: rec.comment = "" |
|
58
|
|
|
if 'relationship' in self.optional_attrs: |
|
59
|
|
|
rec.relationship = {} |
|
60
|
|
|
rec.relationship_rev = {} |
|
61
|
|
|
|
|
62
|
|
|
def _get_synonym(self, line): |
|
63
|
|
|
"""Given line, return optional attribute synonym value in a namedtuple.""" |
|
64
|
|
|
# Example synonyms: |
|
65
|
|
|
# "peptidase inhibitor complex" EXACT [GOC:bf, GOC:pr] |
|
66
|
|
|
# "regulation of postsynaptic cytosolic calcium levels" EXACT syngo_official_label [] |
|
67
|
|
|
# "tocopherol 13-hydroxylase activity" EXACT systematic_synonym [] |
|
68
|
|
|
mtch = self.attr2cmp['synonym'].match(line) |
|
69
|
|
|
text, scope, typename, dbxrefs, _ = mtch.groups() |
|
70
|
|
|
typename = typename.strip() |
|
71
|
|
|
dbxrefs = set(dbxrefs.split(', ')) if dbxrefs else set() |
|
72
|
|
|
return self.attr2cmp['synonym nt']._make([text, scope, typename, dbxrefs]) |
|
73
|
|
|
|
|
74
|
|
|
def _get_xref(self, line): |
|
75
|
|
|
"""Given line, return optional attribute xref value in a dict of sets.""" |
|
76
|
|
|
# Ex: Wikipedia:Zygotene |
|
77
|
|
|
# Ex: Reactome:REACT_22295 "Addition of a third mannose to ..." |
|
78
|
|
|
mtch = self.attr2cmp['xref'].match(line) |
|
79
|
|
|
return mtch.group(1).replace(' ', '') |
|
80
|
|
|
|
|
81
|
|
|
@staticmethod |
|
82
|
|
|
def _init_compile_patterns(optional_attrs): |
|
83
|
|
|
"""Compile search patterns for optional attributes if needed.""" |
|
84
|
|
|
attr2cmp = {} |
|
85
|
|
|
if optional_attrs is None: |
|
86
|
|
|
return attr2cmp |
|
87
|
|
|
# "peptidase inhibitor complex" EXACT [GOC:bf, GOC:pr] |
|
88
|
|
|
# "blood vessel formation from pre-existing blood vessels" EXACT systematic_synonym [] |
|
89
|
|
|
# "mitochondrial inheritance" EXACT [] |
|
90
|
|
|
# "tricarboxylate transport protein" RELATED [] {comment="WIkipedia:Mitochondrial_carrier"} |
|
91
|
|
|
if 'synonym' in optional_attrs: |
|
92
|
|
|
attr2cmp['synonym'] = re.compile(r'"(\S.*\S)" ([A-Z]+) (.*)\[(.*)\](.*)$') |
|
93
|
|
|
attr2cmp['synonym nt'] = cx.namedtuple("synonym", "text scope typename dbxrefs") |
|
94
|
|
|
# Wikipedia:Zygotene |
|
95
|
|
|
# Reactome:REACT_27267 "DHAP from Ery4P and PEP, Mycobacterium tuberculosis" |
|
96
|
|
|
if 'xref' in optional_attrs: |
|
97
|
|
|
attr2cmp['xref'] = re.compile(r'^(\S+:\s*\S+)\b(.*)$') |
|
98
|
|
|
return attr2cmp |
|
99
|
|
|
|
|
100
|
|
|
@staticmethod |
|
101
|
|
|
def get_optional_attrs(optional_attrs): |
|
102
|
|
|
"""Prepare to store data from user-desired optional fields. |
|
103
|
|
|
|
|
104
|
|
|
Not loading these optional fields by default saves in space and speed. |
|
105
|
|
|
But allow the possibility for saving these fields, if the user desires, |
|
106
|
|
|
Including: |
|
107
|
|
|
comment consider def is_class_level is_metadata_tag is_transitive |
|
108
|
|
|
relationship replaced_by subset synonym transitive_over xref |
|
109
|
|
|
""" |
|
110
|
|
|
attrs_opt = set(['def', 'defn', 'synonym', 'relationship', 'xref', 'subset', 'comment']) |
|
111
|
|
|
# Required attributes are always loaded. All others are optionally loaded. |
|
112
|
|
|
# Allow user to specify either: 'def' or 'defn' |
|
113
|
|
|
# 'def' is an obo field name, but 'defn' is legal Python attribute name |
|
114
|
|
|
getnm = lambda aopt: aopt if aopt != "defn" else "def" |
|
115
|
|
|
# pylint: disable=redefined-variable-type |
|
116
|
|
|
opts = None |
|
117
|
|
|
if isinstance(optional_attrs, str) and optional_attrs in attrs_opt: |
|
118
|
|
|
opts = set([getnm(optional_attrs)]) |
|
119
|
|
|
else: |
|
120
|
|
|
opts = set([getnm(f) for f in optional_attrs if f in attrs_opt]) |
|
121
|
|
|
if opts: |
|
122
|
|
|
return opts |
|
123
|
|
|
|
|
124
|
|
|
|
|
125
|
|
|
# Copyright (C) 2015-2018, DV Klopfenstein, H Tang, All rights reserved. |
|
126
|
|
|
|