Passed
Push — master ( 41c0d7...aa2adb )
by Marcin
07:40
created

RNAmodel.__get_atoms()   B

Complexity

Conditions 6

Size

Total Lines 17
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 15
nop 1
dl 0
loc 17
rs 8.6666
c 0
b 0
f 0
1
#!/usr/bin/env python
2
from __future__ import print_function
3
__docformat__ = 'reStructuredText'
4
import os
5
import Bio.PDB.PDBParser
6
import Bio.PDB.Superimposer
7
from Bio.PDB.PDBIO import Select
8
from Bio.PDB import PDBIO
9
from Bio.SVDSuperimposer import SVDSuperimposer
10
from numpy import sqrt, array, asarray
11
12
13
BACKBONE_ATOMS = [
14
    'P', 'OP1', 'OP2', "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"
15
]
16
# Atoms used to anchor base orientation per residue type
17
PURINE_ATOMS = ['N9', 'C8', 'C7']  # G, A
18
PYRIMIDINE_ATOMS = ['N1', 'C6', 'C2']  # C, U (and T)
19
PURINES = {'A', 'G', 'DA', 'DG'}
20
PYRIMIDINES = {'C', 'U', 'T', 'DT'}
21
22
ATOM_SELECTION_SUMMARY = "\n".join([
23
    f"BACKBONE_ATOMS = {BACKBONE_ATOMS}",
24
    "# Atoms used to anchor base orientation per residue type",
25
    f"PURINE_ATOMS = {PURINE_ATOMS}  # G, A",
26
    f"PYRIMIDINE_ATOMS = {PYRIMIDINE_ATOMS}  # C, U (and T)",
27
    f"PURINES = {sorted(PURINES)}",
28
    f"PYRIMIDINES = {sorted(PYRIMIDINES)}",
29
])
30
31
32
def get_atom_selection_summary():
33
    return ATOM_SELECTION_SUMMARY
34
35
class RNAmodel:
36
    """RNAmodel
37
38
    :Example:
39
40
        >>> rna = RNAmodel("test_data/rp14/rp14_5ddp_bound_clean_ligand.pdb", [1], False, None)
41
        >>> rna.get_report()
42
        "File:  rp14_5ddp_bound_clean_ligand.pdb  # of atoms: 1 \\nresi:  1  atom:  <Atom C3'> \\n"
43
44
    :param fpath: file path, string
45
    :param residues: list of residues to use (and since we take only 1 atom, C3', this equals to number of atoms.
46
    :param save: boolean, save to output_dir or not
47
    :param output_dir: string, if save, save segments to this folder
48
    """
49
    #:returns: None
50
    #:rtype: None
51
    #"""
52
    def __init__(self, fpath, residues, save=False, output_dir=""):
53
54
        # parser 1-5 -> 1 2 3 4 5
55
        self.struc = Bio.PDB.PDBParser().get_structure('', fpath)
56
        self.fpath = fpath
57
        self.fn = os.path.basename(fpath)
58
        self.residues = residues #self.__parser_residues(residues)
59
        self.__get_atoms()
60
        #self.atoms = []
61
        if save:
62
            self.save(output_dir) # @save
63
64
    def __parser_residues(self, residues):
65
        """Get string and parse it
66
        '1 4 5 10-15' -> [1, 4, 5, 10, 11, 12, 13, 14, 15]"""
67
        rs = []
68
        for r in residues.split():
69
            l = parse_num_list(r)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable parse_num_list does not seem to be defined.
Loading history...
70
            for i in l:
71
                if i in rs:
72
                    raise Exception('You have this resi already in your list! See', residues)
73
            rs.extend(l)
74
        return rs
75
76
    def __get_atoms(self):
77
        self.atoms = []
78
        self.atom_ids = []  # (resSeq, atomName)
79
        self.atom_lookup = {}
80
        for res in self.struc.get_residues():
81
            res_seq = res.id[1]
82
            if res_seq not in self.residues:
83
                continue
84
            # backbone atoms
85
            for atom_name in BACKBONE_ATOMS:
86
                self._append_atom_if_present(res, res_seq, atom_name)
87
            # base atoms
88
            for atom_name in self._get_base_atoms(res):
89
                self._append_atom_if_present(res, res_seq, atom_name)
90
        if len(self.atom_ids) <= 0:
91
            raise Exception('problem: no atoms were selected!: %s' % self.fn)
92
        return self.atoms
93
94
    def _append_atom_if_present(self, residue, res_seq, atom_name):
95
        if residue.has_id(atom_name):
96
            atom = residue[atom_name]
97
            key = (res_seq, atom_name)
98
            self.atom_lookup[key] = atom
99
            self.atom_ids.append(key)
100
            self.atoms.append(atom)
101
        else:
102
            pass  # silently ignore missing atoms
103
104
    def _get_base_atoms(self, residue):
105
        resname = residue.get_resname().strip().upper()
106
        if resname in PURINES or (resname and resname[0] in ('A', 'G')):
107
            return PURINE_ATOMS
108
        if resname in PYRIMIDINES or (resname and resname[0] in ('C', 'U', 'T')):
109
            return PYRIMIDINE_ATOMS
110
        return []
111
112
    def __str__(self):
113
        return self.fn #+ ' # beads' + str(len(self.residues))
114
115
    def __repr__(self):
116
        return self.fn #+ ' # beads' + str(len(self.residues))
117
118
    def get_report(self):
119
        """Str a short report about rna model"""
120
        t = ' '.join(['File: ', self.fn, ' # of atoms:', str(len(self.atoms)), '\n'])
121
        for r, a in zip(self.residues, self.atoms):
122
            t += ' '.join(['resi: ', str(r), ' atom: ', str(a), '\n'])
123
        return t
124
125
    def get_rmsd_to(self, other_rnamodel, output='', dont_move=False):
126
        """Calc rmsd P-atom based rmsd to other rna model"""
127
        sup = Bio.PDB.Superimposer()
128
129
        paired_self, paired_other = self._get_matched_atom_lists(other_rnamodel)
130
        if dont_move:
131
            coords = array([a.get_vector().get_array() for a in paired_self])
132
            other_coords = array([a.get_vector().get_array() for a in paired_other])
133
            s = SVDSuperimposer()
134
            s.set(coords, other_coords)
135
            return s.get_init_rms()
136
137
        sup.set_atoms(paired_self, paired_other)
138
139
        rms = round(sup.rms, 3)
140
        
141
        if output:
142
            io = Bio.PDB.PDBIO()
143
            sup.apply(self.struc.get_atoms())
144
            io.set_structure( self.struc )
145
            io.save("aligned.pdb")
146
147
            io = Bio.PDB.PDBIO()
148
            sup.apply(other_rnamodel.struc.get_atoms())
149
            io.set_structure( other_rnamodel.struc )
150
            io.save("aligned2.pdb")
151
        return rms
152
153
    def _get_matched_atom_lists(self, other_rnamodel):
154
        other_lookup = other_rnamodel.atom_lookup
155
        common_keys = [key for key in self.atom_ids if key in other_lookup]
156
        if not common_keys:
157
            raise Exception('No common atoms found between %s and %s' % (self.fn, other_rnamodel.fn))
158
        self_atoms = [self.atom_lookup[key] for key in common_keys]
159
        other_atoms = [other_lookup[key] for key in common_keys]
160
        return self_atoms, other_atoms
161
162
    def save(self, output_dir, verbose=True):
163
        """Save structures and motifs """
164
        folder_to_save =  output_dir + os.sep # ugly hack 'rp14/'
165
        try:
166
            os.makedirs(folder_to_save)
167
        except OSError:
168
            pass
169
170
        try:
171
            os.mkdir(folder_to_save + 'structures')
172
        except OSError:
173
            pass
174
175
        try:
176
            os.mkdir(folder_to_save + 'motifs')
177
        except OSError:
178
            pass
179
180
        RESI = self.residues
181
        if not self.struc:
182
            raise Exception('self.struct was not defined! Can not save a pdb!')
183
184
        class BpSelect(Select):
185
            def accept_residue(self, residue):
186
                if residue.get_id()[1] in RESI:
187
                    return 1
188
                else:
189
                    return 0
190
191
        io = PDBIO()
192
        io.set_structure(self.struc)
193
        fn = folder_to_save + 'structures' + os.sep + self.fn #+ '.pdb'
194
        io.save(fn)
195
        if verbose:
196
            print('    saved to struc: %s ' % fn)
197
198
        io = PDBIO()
199
        io.set_structure(self.struc)
200
        fn = folder_to_save +  'motifs/' + os.sep + self.fn #+ self.fn.replace('.pdb', '_motif.pdb')# #+ '.pdb'
201
        io.save(fn, BpSelect())
202
        if verbose:
203
            print('    saved to motifs: %s ' % fn)
204
205
#main
206
if __name__ == '__main__':
207
    import doctest
208
    doctest.testmod()
209
210
    #rna = RNAmodel("test_data/rp14/rp14_5ddp_bound_clean_ligand.pdb", [1], False, Non3e)
211
    #print(rna.get_report())
212
    a = RNAmodel("test_data/GGC.pdb", [46,47,48])
213
    b = RNAmodel("test_data/GUC.pdb", [31, 32, 33])
214
215
    print(a.get_rmsd_to(b))
216
    print(a.get_rmsd_to(b, dont_move=True))
217
    
218