OboOptionalAttrs.update_rec()   F
last analyzed

Complexity

Conditions 15

Size

Total Lines 30

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 15
dl 0
loc 30
rs 2.9998
c 2
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like OboOptionalAttrs.update_rec() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""Manage optional GO-DAG attributes."""
2
3
__copyright__ = "Copyright (C) 2015-2018, DV Klopfenstein, H Tang, All rights reserved."
4
__author__ = "DV Klopfenstein"
5
6
import re
7
import collections as cx
8
9
10
class OboOptionalAttrs(object):
11
    """Manage optional GO-DAG attributes."""
12
13
    attributes = set(['def', 'defn', 'synonym', 'relationship', 'xref', 'subset', 'comment']) 
14
15
    def __init__(self, optional_attrs):
16
        assert optional_attrs
17
        self.optional_attrs = optional_attrs
18
        self.attr2cmp = self._init_compile_patterns(optional_attrs)
19
20
    def update_rec(self, rec, line):
21
        """Update current GOTerm with optional record."""
22
        if 'def' in self.optional_attrs and line[:5] == "def: ":
23
            assert not hasattr(rec, 'defn'), "ATTR(defn) ALREADY SET({VAL})".format(VAL=rec.defn)
24
            # Use 'defn' because 'def' is a reserved word in python
25
            rec.defn = line[5:]
26
        elif 'synonym' in self.optional_attrs and line[:9] == "synonym: ":
27
            rec.synonym.append(self._get_synonym(line[9:]))
28
        # http://geneontology.org/page/ontology-relations
29
        elif 'relationship' in self.optional_attrs and line[:14] == "relationship: ":
30
            # relationships are stored in a dict of sets, mirroring
31
            # the structure implied in the GO DAG. Example:
32
            #
33
            #  relationship = {
34
            #     'part_of': set(['GO:0021513', 'GO:0006310']),
35
            #     'regulates': set(['GO:0006313']),
36
            #     'negatively_regulates': set(['GO:0021910']),
37
            #     'positively_regulates': set(['GO:0006313']),
38
            # }
39
            rel, goid = line[14:].split()[:2]
40
            if rel not in rec.relationship:
41
                rec.relationship[rel] = set([goid])
42
            else:
43
                rec.relationship[rel].add(goid)
44
        elif 'xref' in self.optional_attrs and line[:6] == "xref: ":
45
            rec.xref.add(self._get_xref(line[6:]))
46
        elif 'subset' in self.optional_attrs and line[:8] == "subset: ":
47
            rec.subset.add(line[8:])
48
        elif 'comment' in self.optional_attrs and line[:9] == "comment: ":
49
            rec.comment = line[9:]
50
51
    def init_datamembers(self, rec):
52
        """Initialize current GOTerm with data members for storing optional attributes."""
53
        # pylint: disable=multiple-statements
54
        if 'synonym'      in self.optional_attrs: rec.synonym = []
55
        if 'xref'         in self.optional_attrs: rec.xref = set()
56
        if 'subset'       in self.optional_attrs: rec.subset = set()
57
        if 'comment'      in self.optional_attrs: rec.comment = ""
58
        if 'relationship' in self.optional_attrs:
59
            rec.relationship = {}
60
            rec.relationship_rev = {}
61
62
    def _get_synonym(self, line):
63
        """Given line, return optional attribute synonym value in a namedtuple."""
64
        # Example synonyms:
65
        # "peptidase inhibitor complex" EXACT [GOC:bf, GOC:pr]
66
        # "regulation of postsynaptic cytosolic calcium levels" EXACT syngo_official_label []
67
        # "tocopherol 13-hydroxylase activity" EXACT systematic_synonym []
68
        mtch = self.attr2cmp['synonym'].match(line)
69
        text, scope, typename, dbxrefs, _ = mtch.groups()
70
        typename = typename.strip()
71
        dbxrefs = set(dbxrefs.split(', ')) if dbxrefs else set()
72
        return self.attr2cmp['synonym nt']._make([text, scope, typename, dbxrefs])
73
74
    def _get_xref(self, line):
75
        """Given line, return optional attribute xref value in a dict of sets."""
76
        # Ex: Wikipedia:Zygotene
77
        # Ex: Reactome:REACT_22295 "Addition of a third mannose to ..."
78
        mtch = self.attr2cmp['xref'].match(line)
79
        return mtch.group(1).replace(' ', '')
80
81
    @staticmethod
82
    def _init_compile_patterns(optional_attrs):
83
        """Compile search patterns for optional attributes if needed."""
84
        attr2cmp = {}
85
        if optional_attrs is None:
86
            return attr2cmp
87
        # "peptidase inhibitor complex" EXACT [GOC:bf, GOC:pr]
88
        # "blood vessel formation from pre-existing blood vessels" EXACT systematic_synonym []
89
        # "mitochondrial inheritance" EXACT []
90
        # "tricarboxylate transport protein" RELATED [] {comment="WIkipedia:Mitochondrial_carrier"}
91
        if 'synonym' in optional_attrs:
92
            attr2cmp['synonym'] = re.compile(r'"(\S.*\S)" ([A-Z]+) (.*)\[(.*)\](.*)$')
93
            attr2cmp['synonym nt'] = cx.namedtuple("synonym", "text scope typename dbxrefs")
94
        # Wikipedia:Zygotene
95
        # Reactome:REACT_27267 "DHAP from Ery4P and PEP, Mycobacterium tuberculosis"
96
        if 'xref' in optional_attrs:
97
            attr2cmp['xref'] = re.compile(r'^(\S+:\s*\S+)\b(.*)$')
98
        return attr2cmp
99
100
    @staticmethod
101
    def get_optional_attrs(optional_attrs):
102
        """Prepare to store data from user-desired optional fields.
103
104
          Not loading these optional fields by default saves in space and speed.
105
          But allow the possibility for saving these fields, if the user desires,
106
            Including:
107
              comment consider def is_class_level is_metadata_tag is_transitive
108
              relationship replaced_by subset synonym transitive_over xref
109
        """
110
        attrs_opt = set(['def', 'defn', 'synonym', 'relationship', 'xref', 'subset', 'comment'])
111
        # Required attributes are always loaded. All others are optionally loaded.
112
        # Allow user to specify either: 'def' or 'defn'
113
        #   'def' is an obo field name, but 'defn' is legal Python attribute name
114
        getnm = lambda aopt: aopt if aopt != "defn" else "def"
115
        # pylint: disable=redefined-variable-type
116
        opts = None
117
        if isinstance(optional_attrs, str) and optional_attrs in attrs_opt:
118
            opts = set([getnm(optional_attrs)])
119
        else:
120
            opts = set([getnm(f) for f in optional_attrs if f in attrs_opt])
121
        if opts:
122
            return opts
123
124
125
# Copyright (C) 2015-2018, DV Klopfenstein, H Tang, All rights reserved.
126