| 1 |  |  | """Manage optional GO-DAG attributes.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | __copyright__ = "Copyright (C) 2015-2018, DV Klopfenstein, H Tang, All rights reserved." | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | __author__ = "DV Klopfenstein" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | import re | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | import collections as cx | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | class OboOptionalAttrs(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |     """Manage optional GO-DAG attributes.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |     attributes = set(['def', 'defn', 'synonym', 'relationship', 'xref', 'subset', 'comment'])  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |     def __init__(self, optional_attrs): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |         assert optional_attrs | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |         self.optional_attrs = optional_attrs | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |         self.attr2cmp = self._init_compile_patterns(optional_attrs) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |     def update_rec(self, rec, line): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |         """Update current GOTerm with optional record.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |         if 'def' in self.optional_attrs and line[:5] == "def: ": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |             assert not hasattr(rec, 'defn'), "ATTR(defn) ALREADY SET({VAL})".format(VAL=rec.defn) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |             # Use 'defn' because 'def' is a reserved word in python | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |             rec.defn = line[5:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |         elif 'synonym' in self.optional_attrs and line[:9] == "synonym: ": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |             rec.synonym.append(self._get_synonym(line[9:])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |         # http://geneontology.org/page/ontology-relations | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |         elif 'relationship' in self.optional_attrs and line[:14] == "relationship: ": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |             # relationships are stored in a dict of sets, mirroring | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |             # the structure implied in the GO DAG. Example: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |             # | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |             #  relationship = { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |             #     'part_of': set(['GO:0021513', 'GO:0006310']), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |             #     'regulates': set(['GO:0006313']), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |             #     'negatively_regulates': set(['GO:0021910']), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |             #     'positively_regulates': set(['GO:0006313']), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |             # } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |             rel, goid = line[14:].split()[:2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |             if rel not in rec.relationship: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |                 rec.relationship[rel] = set([goid]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |                 rec.relationship[rel].add(goid) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |         elif 'xref' in self.optional_attrs and line[:6] == "xref: ": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |             rec.xref.add(self._get_xref(line[6:])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |         elif 'subset' in self.optional_attrs and line[:8] == "subset: ": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |             rec.subset.add(line[8:]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         elif 'comment' in self.optional_attrs and line[:9] == "comment: ": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |             rec.comment = line[9:] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |     def init_datamembers(self, rec): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |         """Initialize current GOTerm with data members for storing optional attributes.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |         # pylint: disable=multiple-statements | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |         if 'synonym'      in self.optional_attrs: rec.synonym = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         if 'xref'         in self.optional_attrs: rec.xref = set() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |         if 'subset'       in self.optional_attrs: rec.subset = set() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |         if 'comment'      in self.optional_attrs: rec.comment = "" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         if 'relationship' in self.optional_attrs: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |             rec.relationship = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |             rec.relationship_rev = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |     def _get_synonym(self, line): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |         """Given line, return optional attribute synonym value in a namedtuple.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |         # Example synonyms: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |         # "peptidase inhibitor complex" EXACT [GOC:bf, GOC:pr] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |         # "regulation of postsynaptic cytosolic calcium levels" EXACT syngo_official_label [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |         # "tocopherol 13-hydroxylase activity" EXACT systematic_synonym [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |         mtch = self.attr2cmp['synonym'].match(line) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |         text, scope, typename, dbxrefs, _ = mtch.groups() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         typename = typename.strip() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         dbxrefs = set(dbxrefs.split(', ')) if dbxrefs else set() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |         return self.attr2cmp['synonym nt']._make([text, scope, typename, dbxrefs]) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 73 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 74 |  |  |     def _get_xref(self, line): | 
            
                                                                        
                            
            
                                    
            
            
                | 75 |  |  |         """Given line, return optional attribute xref value in a dict of sets.""" | 
            
                                                                        
                            
            
                                    
            
            
                | 76 |  |  |         # Ex: Wikipedia:Zygotene | 
            
                                                                        
                            
            
                                    
            
            
                | 77 |  |  |         # Ex: Reactome:REACT_22295 "Addition of a third mannose to ..." | 
            
                                                                        
                            
            
                                    
            
            
                | 78 |  |  |         mtch = self.attr2cmp['xref'].match(line) | 
            
                                                                        
                            
            
                                    
            
            
                | 79 |  |  |         return mtch.group(1).replace(' ', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |     def _init_compile_patterns(optional_attrs): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |         """Compile search patterns for optional attributes if needed.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |         attr2cmp = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         if optional_attrs is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |             return attr2cmp | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         # "peptidase inhibitor complex" EXACT [GOC:bf, GOC:pr] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         # "blood vessel formation from pre-existing blood vessels" EXACT systematic_synonym [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |         # "mitochondrial inheritance" EXACT [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |         # "tricarboxylate transport protein" RELATED [] {comment="WIkipedia:Mitochondrial_carrier"} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         if 'synonym' in optional_attrs: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |             attr2cmp['synonym'] = re.compile(r'"(\S.*\S)" ([A-Z]+) (.*)\[(.*)\](.*)$') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |             attr2cmp['synonym nt'] = cx.namedtuple("synonym", "text scope typename dbxrefs") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |         # Wikipedia:Zygotene | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         # Reactome:REACT_27267 "DHAP from Ery4P and PEP, Mycobacterium tuberculosis" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         if 'xref' in optional_attrs: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |             attr2cmp['xref'] = re.compile(r'^(\S+:\s*\S+)\b(.*)$') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         return attr2cmp | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |     def get_optional_attrs(optional_attrs): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |         """Prepare to store data from user-desired optional fields. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |           Not loading these optional fields by default saves in space and speed. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |           But allow the possibility for saving these fields, if the user desires, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |             Including: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |               comment consider def is_class_level is_metadata_tag is_transitive | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |               relationship replaced_by subset synonym transitive_over xref | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |         attrs_opt = set(['def', 'defn', 'synonym', 'relationship', 'xref', 'subset', 'comment']) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |         # Required attributes are always loaded. All others are optionally loaded. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |         # Allow user to specify either: 'def' or 'defn' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |         #   'def' is an obo field name, but 'defn' is legal Python attribute name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |         getnm = lambda aopt: aopt if aopt != "defn" else "def" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |         # pylint: disable=redefined-variable-type | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |         opts = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |         if isinstance(optional_attrs, str) and optional_attrs in attrs_opt: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |             opts = set([getnm(optional_attrs)]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |             opts = set([getnm(f) for f in optional_attrs if f in attrs_opt]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |         if opts: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |             return opts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 125 |  |  | # Copyright (C) 2015-2018, DV Klopfenstein, H Tang, All rights reserved. | 
            
                                                        
            
                                    
            
            
                | 126 |  |  |  |