RNAStructure.un_nmr()   F
last analyzed

Complexity

Conditions 15

Size

Total Lines 56
Code Lines 37

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 15
eloc 37
nop 4
dl 0
loc 56
rs 2.9998
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like build.rna_tools.rna_tools_lib.RNAStructure.un_nmr() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""rna_tools_lib.py - main lib file, many tools in this lib is using this file."""
4
5
import os
6
import sys
7
from collections import OrderedDict
8
import re
9
import string
10
import time
11
import gzip
12
import tempfile
13
import shutil
14
import subprocess
15
16
from rna_tools.tools.extra_functions.select_fragment import select_pdb_fragment_pymol_style, select_pdb_fragment
17
18
from icecream import ic
19
import sys
20
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr), includeContext=True)
21
ic.configureOutput(prefix='')
22
23
import logging
24
logger = logging.getLogger('rna-tools')
25
handler = logging.StreamHandler()
26
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
27
handler.setFormatter(formatter)
28
logger.addHandler(handler)
29
30
# Don't fix OP3, ignore it
31
ignore_op3 = False
32
33
# Settings: what is what in a PDB file
34
AMINOACID_CODES = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", "GLY",
35
                   "HIS", "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR",
36
                   "TRP", "TYR", "VAL"]
37
38
def aa3to1(aaa):
39
    """based on https://pymolwiki.org/index.php/Aa_codes"""
40
    if len(aaa) != 3:  # aaa is 'G', like for RNA ;-)
41
        return '' # dont do it for rna test aaa
42
    one_letter ={'VAL':'V', 'ILE':'I', 'LEU':'L', 'GLU':'E', 'GLN':'Q', \
43
                 'ASP':'D', 'ASN':'N', 'HIS':'H', 'TRP':'W', 'PHE':'F', 'TYR':'Y',    \
44
                 'ARG':'R', 'LYS':'K', 'SER':'S', 'THR':'T', 'MET':'M', 'ALA':'A',    \
45
                 'GLY':'G', 'PRO':'P', 'CYS':'C'}
46
    return one_letter[aaa]
47
48
49
RES = ['DA', 'DG', 'DT', 'DC']
50
RES += ['A', 'G', 'U', 'C']
51
52
RESS = ['A', 'C', 'G', 'U', 'ADE', 'CYT', 'GUA', 'URY', 'URI', 'U34', 'U31', 'C31', '4SU', 'H2U', 'QUO', 'G7M', '5MU',
53
        '5MC', 'PSU', '2MG', '1MG', '1MA', 'M2G', '5BU', 'FHU', 'FMU', 'IU', 'OMG', 'OMC', 'OMU', 'A2M', 'A23', 'CCC',
54
        'I'] + ['RC', 'RU', 'RA', 'RG', 'RT']
55
DNA = ['DA', 'DG', 'DT', 'DC']
56
RNA = ['A', 'G', 'U', 'C']
57
IONS = ['NA', 'MG', 'MN', 'JOS'] # and ligands JOS
58
HYDROGEN_NAMES = ["H", "H5'", "H5''", "H4'", "H3'", "H2'", "HO2'", "H1'", "H3", "H5", "H6", "H5T", "H41", "1H5'",
59
                  "2H5'", "HO2'", "1H4", "2H4", "1H2", "2H2", "H1", "H8", "H2", "1H6", "2H6", "HO5'", "H21", "H22",
60
                  "H61", "H62", "H42", "HO3'", "1H2'", "2HO'", "HO'2", "H2'1", "HO'2", "HO'2", "H2", "H2'1", "H1", "H2",
61
                  "1H5*", "2H5*", "H4*", "H3*", "H1*", "1H2*", "2HO*", "1H2", "2H2", "1H4", "2H4", "1H6", "2H6", "H1",
62
                  "H2", "H3", "H5", "H6", "H8", "H5'1", "H5'2", "H3T"]
63
64
65
class PDBFetchError(Exception):
66
    pass
67
68
class MethodUnknown(Exception):
69
    pass
70
71
72
try:
73
    from Bio.PDB import *
74
except ImportError:
75
    print("Biopython is not detected. It is required for some functions.")
76
77
import warnings
78
warnings.filterwarnings('ignore', '.*Invalid or missing.*',)
79
warnings.filterwarnings('ignore', '.*with given element *',)
80
warnings.filterwarnings('ignore', '.*is discontinuous*',)
81
warnings.filterwarnings('ignore', '.*Some atoms or residues may be missing in the data structure.*',)
82
warnings.filterwarnings('ignore', '.*Ignoring unrecognized record.*',)
83
84
85
import subprocess
86
def exe(cmd):
87
    o = subprocess.Popen(
88
        cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
89
    out = o.stdout.read().strip().decode()
90
    err = o.stderr.read().strip().decode()
91
    return out, err
92
93
94
def get_rna_tools_path():
95
    """Return path to the rt."""
96
    import inspect
97
    import rna_tools
98
    return os.path.dirname(inspect.getfile(rna_tools))
99
100
def get_version(currfn='', verbose=False):
101
    import rna_tools
102
    return rna_tools.__version__
103
104
105
def sort_strings(l):
106
    """ Sort the given list in the way that humans expect.
107
    http://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
108
    """
109
    def convert(text): return int(text) if text.isdigit() else text
110
111
    def alphanum_key(key): return [convert(c) for c in re.split('([0-9]+)', key)]
112
    l.sort(key=alphanum_key)
113
    return l
114
115
116
class RNAStructure:
117
    """RNAStructure - handles an RNA pdb file.
118
119
    Attributes:
120
121
        fn (string)  : path to the structural file, e.g., "../rna_tools/input/4ts2.pdb"
122
        name(string) : filename of the structural file, "4ts2.pdb"
123
        lines (list) : the PDB file is loaded and ATOM/HETATM/TER/END go to self.lines
124
125
    """
126
127
    def __init__(self, fn=''):
128
        if not fn: # if empty
129
            tf = tempfile.NamedTemporaryFile(delete=False)
130
            fn = tf.name
131
132
        self.fn = os.path.abspath(fn) # ../rna_tools/input/4ts2.pdb
133
        # add name attribute with filename 4ts2.pdb
134
        self.name = self.fn.split(os.sep)[-1] # OS.path.splitext(self.fn)[0]
135
136
        self.report = []
137
        self.report.append('The RNARNAStructure report: %s ' % self.fn)
138
139
        self.mol2_format = False
140
141
        self.lines = []
142
143
        try:
144
            # lines = open(fn).read().strip().split('\n') # don't strip?, good or bed?
145
            lines = open(fn).read().split('\n')
146
        except UnicodeDecodeError:
147
            print("Can't open a binary file")
148
            self.lines = ''
149
            return
150
151
        self.has_many_models = False
152
153
        for l in lines:
154
            # multi-models pdb files
155
            if l.startswith('MODEL'):
156
                self.has_many_models = True
157
            if l.startswith('ENDMDL'):
158
                break
159
160
            if l.startswith('ATOM') or l.startswith('HETATM') or l.startswith('TER') or l.startswith('END'):
161
                self.lines.append(l) # don't strip .strip())
162
            if l.startswith("@<TRIPOS>"):
163
                self.mol2_format = True
164
                self.report.append('This is mol2 format')
165
166
        self.res = self.get_resn_uniq()
167
168
    def reload(self):
169
        """Reload the object."""
170
        self.__init__(self.fn)
171
        
172
    def is_pdb(self):
173
        """Return True if the files is in PDB format.
174
175
        If self.lines is empty it means that nothing was parsed into the PDB format."""
176
        if len(self.lines):
177
            return True
178
        else:
179
            return False
180
181
    def is_nmr(self):
182
        """True if the file is an NMR-style multiple model pdb
183
184
        :returns: True or Fo
185
        :rtype: boolean
186
        """
187
        return self.has_many_models
188
189
    def un_nmr(self, startwith1=True, directory='', verbose=False):
190
        """Un NMR - Split NMR-style multiple model pdb files into individual models.
191
192
        Take self.fn and create new files in the way::
193
194
            input/1a9l_NMR_1_2_models.pdb
195
               input/1a9l_NMR_1_2_models_0.pdb
196
               input/1a9l_NMR_1_2_models_1.pdb
197
198
        .. warning:: This function requires biopython.
199
200
        rna_pdb_tools.py --un-nmr AF-Q5TCX8-F1-model_v1_core_Ctrim_mdr_MD.pdb
201
36
202
        cat *MD_* > md.pdb
203
        """
204
        #
205
        npdb = ''
206
        c = 0
207
208
        nmodels = 0
209
        for l in open(self.fn):
210
            if l.startswith('MODEL'):
211
                nmodels += 1
212
        print(nmodels)
213
214
        for l in open(self.fn):
215
            if 1:
216
                if not l.startswith('HETATM'):
217
                    npdb += l
218
            if l.startswith('MODEL'):
219
                c += 1
220
            if l.startswith('ENDMDL'):
221
                id = str(c).zfill(len(str(nmodels)))
222
                nf = self.fn.replace('.pdb', '_%s.pdb' % id)
223
                verbose = 1
224
                if verbose:
225
                    print(nf)
226
                with open(nf, 'w') as f:
227
                    f.write(npdb)
228
                npdb = ''
229
        biopython = 0
230
        
231
        if biopython:
232
            parser = PDBParser()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable PDBParser does not seem to be defined.
Loading history...
233
            structure = parser.get_structure('', self.fn)
234
            for c, m in enumerate(structure):
235
                if verbose:
236
                    print(m)
237
                io = PDBIO()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable PDBIO does not seem to be defined.
Loading history...
238
                io.set_structure(m)
239
                if startwith1:
240
                    c += 1
241
                if directory:
242
                    io.save(directory + os.sep + self.fn.replace('.pdb', '_%i.pdb' % c))
243
                else:
244
                    io.save(self.fn.replace('.pdb', '_%i.pdb' % c))                
245
246
    def is_mol2(self):
247
        """Return True if is_mol2 based on the presence of ```@<TRIPOS>```."""
248
        return self.mol2_format
249
250
    def decap_gtp(self):
251
        lines = []
252
        for l in self.lines:
253
            if l.startswith('ATOM') or l.startswith('HETATM'):
254
                if l[12:16].strip() in ['PG', 'O1G', 'O2G', 'O3G', 'O3B', 'PB', 'O1B', 'O2B', 'O3A']:
255
                    continue
256
                if l[12:16].strip() == 'PA':
257
                    l = l.replace('PA', 'P ')
258
                if l[12:16].strip() == 'O1A':
259
                    l = l.replace('O1A', 'O1P')
260
                if l[12:16].strip() == 'O2A':
261
                    l = l.replace('O2A', 'O2P')
262
                if l[17:20].strip() == 'GTP':
263
                    l = l[:17] + '  G' + l[20:]
264
                    l = l.replace('HETATM', 'ATOM  ')
265
                lines.append(l)
266
        self.lines = lines
267
268
    def is_amber_like(self):
269
        """Use self.lines and check if there is XX line
270
        """
271
        for l in self.lines:
272
            if l.startswith('ATOM') or l.startswith('HETATM'):
273
                rn = l[17:20]
274
                if rn in ['RU5', 'RC5', 'RA5', 'RT5', 'RG5']:
275
                    self.report.append('This is amber-like format')
276
                    return True
277
        return False
278
279
    def replace_hetatms(self):
280
        nlines = []
281
        for l in self.lines:
282
            l = l.replace('HETATM', 'ATOM  ')
283
            nlines.append(l)
284
        self.lines = nlines
285
286
    def fix_with_qrnas(self, outfn="", verbose=False):
287
        """Add missing heavy atom.
288
289
        A residue is recognized base on a residue names.
290
291
        Copy QRNAS folder to curr folder, run QRNAS and remove QRNAS.
292
293
        .. warning:: QRNAS required (http://genesilico.pl/QRNAS/QRNAS.tgz)
294
        """
295
        from rpt_config import QRNAS_PATH
296
297
        # prepare folder get ff folder
298
        to_go = os.path.abspath(os.path.dirname(self.fn))
299
        curr = os.getcwd()
300
301
        # set occupancy to 0
302
        s = RNAStructure(self.fn)
303
        s.set_occupancy_atoms(0.00)
304
        s.write(self.fn)
305
306
        os.chdir(to_go)
307
        try:
308
            shutil.copytree(QRNAS_PATH, to_go + os.sep + "QRNAS")
309
        except OSError:
310
            pass
311
        # prepare config file
312
        with open('qrna_config.txt', 'w') as f:
313
            f.write("WRITEFREQ   1\n")
314
            f.write("NSTEPS      1\n")
315
        # run qrnas
316
        print('QRNAS...')
317
318
        if not outfn:
319
            cmd = "QRNAS -c qrna_config.txt -i " + os.path.basename(self.fn)
320
        else:
321
            cmd = "QRNAS -c qrna_config.txt -i " + \
322
                os.path.basename(self.fn) + " -o " + curr + os.sep + outfn
323
        #o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
324
        os.system(cmd)
325
        if False:
326
            out = o.stdout.read().strip()
327
            err = o.stderr.read().strip()
328
            if verbose:
329
                print(out)
330
                print(err)
331
                shutil.rmtree(to_go + os.sep + "QRNAS")
332
        # post cleaning
333
        if outfn:
334
            print('Cleaning...')
335
            s = RNAStructure(curr + os.sep + outfn)
336
            s.remove_hydrogen()
337
            s.std_resn()
338
            s.write(curr + os.sep + outfn)
339
        os.chdir(curr)
340
341
    def mol2toPDB(self, outfn=""):
342
        try:
343
            import pybel
344
        except ImportError:
345
            print ('pybel is needed for mol2 to pdb convertion')
346
            # sys.exit(1)
347
            sys.exit(0)
348
349
        if not outfn:
350
            outfn = self.fn.replace('.mol2', '.pdb')
351
352
        for mol in pybel.readfile("mol2", self.fn):
353
            mol.write("pdb", outfn, overwrite=True)
354
355
        print('outfn: ', outfn)
356
        self.report.append('  Converted from mol2 to PDB')
357
        return outfn
358
359
    def get_no_lines(self):
360
        return len(self.lines)
361
        
362
    def check_geometry(self, verbose=False):
363
        """Check for correct "Polymer linkage, it should be around 1.6Å with a sigma around 0.01.
364
        
365
        Carrascoza, F., Antczak, M., Miao, Z., Westhof, E. & Szachniuk, M. Evaluation of the stereochemical quality of predicted RNA 3D models in the RNA-Puzzles submissions. Rna 28, 250–262 (2022).
366
367
        Values for 1xjr.pdb::
368
369
            op.mean(): 1.599316
370
            op.std(): 0.009274074
371
372
            po.mean(): 1.5984017
373
            po.std(): 0.0069191623
374
        
375
        requires biopython
376
        """
377
        if verbose:
378
            logger.setLevel(logging.DEBUG)
379
380
        try:
381
            from Bio import PDB
382
            from Bio.PDB import PDBIO
383
            import warnings
384
            warnings.filterwarnings('ignore', '.*Invalid or missing.*',)
385
            warnings.filterwarnings('ignore', '.*with given element *',)
386
        except:
387
            sys.exit('Error: Install biopython to use this function (pip biopython)')
388
389
        pdb = self.fn
390
        struc = PDB.PDBParser().get_structure('struc', pdb)
391
        resi_prev = {}
392
        op = []
393
        po = []
394
        angles = []
395
        angles2 = []
396
397
        p_ps = []
398
        sigma = 0.009274074
399
        mean =  1.599316
400
        import numpy as np
401
402
        for s in struc:
403
            for c in s:
404
                # for new chain reset resi_prev
405
                resi_prev = None
406
                for r in c:
407
                    if resi_prev:
408
                        p_p = resi_prev["P"] - r['P']
409
                        p_ps.append(p_p)
410
                        
411
                        v = resi_prev["O3'"] - r['P']
412
                        op.append(v)
413
                        x = mean - 6 * sigma
414
                        y = mean + 6 * sigma
415
416
                        v2 = r["P"] - r["O5'"]
417
                        #ic(v2)
418
                        po.append(v2)
419
420
                        if not (x <= v <= y):
421
                            print(f' ! lack of connectivity between {r}, {resi_prev}, distance between residues: {v:.2f}')
422
423
                        # angle
424
                        pc3p = resi_prev["O3'"].get_vector()
425
                        p = r['P'].get_vector()
426
                        o5p = r["O5'"].get_vector()
427
                        bo5p = p - o5p # b as bond
428
                        bpc3p = p - pc3p
429
                        if 0:
430
                            ic(bo5p.angle(bpc3p))
431
                            ic(bpc3p.angle(bo5p))
432
                        angle = np.rad2deg(bpc3p.angle(bo5p))
433
                        angles.append(angle)
434
435
436
                        # angle
437
                        po3p = resi_prev["O3'"].get_vector()
438
                        p = r['P'].get_vector()
439
                        o5p = r["O5'"].get_vector()
440
                        bo5p = p - o5p # b as bond
441
                        bpc3p = p - po3p
442
                        if 0:
443
                            ic(bo5p.angle(bpc3p))
444
                            ic(bpc3p.angle(bo5p))
445
                        angle = np.rad2deg(bpc3p.angle(bo5p))
446
                        angles.append(angle)
447
448
                        pc3p = resi_prev["C3'"].get_vector()
449
                        v1 = po3p - pc3p
450
                        v2 = po3p - p
451
                        angle2 = np.rad2deg(v2.angle(v1))
452
                        angles2.append(angle2)
453
454
                    resi_prev = r
455
456
        p_ps = np.array(p_ps)
457
        #np.set_printoptions(threshold=np.inf)
458
        ic.disable()
459
        ic(p_ps)
460
        ic(p_ps.mean(), p_ps.std())
461
        if False:
462
            import matplotlib.pyplot as plt
463
            plt.hist(p_ps, bins='auto')
464
            plt.title("p-ps")
465
            plt.show()
466
467
        op = np.array(op)
468
        ic(op.mean(), op.std())
469
470
        po = np.array(po)
471
        ic(po.mean(), po.std())
472
473
        angles = np.array(angles)
474
        ic(angles)
475
        ic(angles.mean(), angles.std())
476
        if False:
477
            import matplotlib.pyplot as plt
478
            plt.hist(angles, bins='auto')
479
            plt.title("Histogram of angles o3'-p-o5'")
480
            plt.xlim(0, 360)
481
            plt.show()
482
        
483
        angles2 = np.array(angles2)
484
        ic(angles2)
485
        ic(angles2.mean(), angles2.std())
486
487
        if False:
488
            import matplotlib.pyplot as plt
489
            plt.hist(angles2, bins='auto')
490
            plt.title("Histogram of angles c3'-o3'-p")
491
            plt.xlim(0, 360)
492
            plt.show()
493
        ic.enable()
494
        
495
    def get_text(self, add_end=True):
496
        """works on self.lines."""
497
        txt = ''
498
        for l in self.lines:
499
            if l.startswith('END'):
500
                continue  # skip end
501
            txt += l + '\n' # .strip()
502
        if add_end:
503
            if not l.startswith('END'):
0 ignored issues
show
introduced by
The variable l does not seem to be defined in case the for loop on line 498 is not entered. Are you sure this can never be the case?
Loading history...
504
                txt += 'END'
505
        return txt.strip()
506
507
    def get_chain(self, chain_id='A'):
508
        txt = ''
509
        for l in self.lines:
510
            if l.startswith('ATOM') or l.startswith('HETATM'):
511
                if l[21] == chain_id:
512
                    txt += l.strip() + '\n'
513
            if l.startswith('TER'):
514
                if len(l) < 21: # this is only TER
515
                    #                     ter = 'TER   ' +  str(self.get_atom_num(txt) + 1).rjust(5) + '\n'
516
                    raise Exception('TER line has no chain id')
517
                else:     
518
                    if l[21] == chain_id:
519
                          txt += l.strip() + '\n'
520
        ## ll = txt.strip().split('\n')[-1] # last line = ll
521
        ## print(ll)
522
        ## #if ll.startswith('TER'):  # if the last line does not start with ter
523
        ## ter = self.set_res_code(ter, self.get_res_code(ll)) # + self.get_chain_id(l).rjust(11)
524
        ## print(ter)
525
        return txt.strip()
526
527
    def rename_chain(self, chain_id_old, chain_id_new, debug=False):
528
        """Rename chains
529
530
        Args:
531
            chain_id_old (str): e.g., A
532
            chain_id_new (str): e.g., B
533
            debug (bool): show some diagnostics
534
535
        Returns:
536
            pdb content (txt)
537
            self.lines is updated with new lines
538
        """
539
        txt = ''
540
        lines = []
541
        for l in self.lines:
542
            if l.startswith('ATOM') or l.startswith('HETATM') or l.startswith('TER'):
543
                # TER
544
                try:
545
                    l[21]
546
                except IndexError:
547
                    continue
548
                if l[21] == chain_id_old:
549
                    l = l[:21] + chain_id_new + l[22:]
550
            if debug: print(l)
551
            txt += l.strip() + '\n'  # ok, actually keep all lines as it was
552
            lines.append(l)
553
        self.lines = lines
554
        return txt
555
556
557
    def get_resn_uniq(self):
558
        res = set()
559
        for l in self.lines:
560
            r = l[17:20].strip().upper()
561
            res.add(r)
562
        return res
563
564
    def check_res_if_std_na(self):
565
        wrong = []
566
567
        for r in self.res:
568
            if r not in RES:
569
                wrong.append(r)
570
        return wrong
571
572
    def get_seq(self, compact=False, chainfirst=False, fasta=False, addfn='', color=False):
573
        """Get seq (v2) gets segments of chains with correct numbering
574
575
        Run::
576
577
            python rna_pdb_seq.py input/1ykq_clx.pdb
578
            > 1ykq_clx A:101-111
579
            GGAGCUCGCCC
580
            > 1ykq_clx B:201-238
581
            GGGCGAGGCCGUGCCAGCUCUUCGGAGCAAUACUCGGC
582
583
            > 6_solution_0 A:1-19 26-113 117-172
584
            GGCGGCAGGUGCUCCCGACGUCGGGAGUUAAAAGGGAAG
585
586
        Chains is ``{'A': {'header': 'A:1-19 26-113 117-172', 'resi': [1, 2, 3, ..., \
587
        19, 26, 27, ..., 172], 'seq': ['G', 'G', 'C', 'G', ... C', 'G', 'U', 'C']}}``
588
589
        Chains are in other as the appear in the file.
590
591
        .. warning :: take only ATOM and HETATM lines.
592
        """
593
        if addfn:
594
            addfn += ' ' # add space to file name
595
        seq = self.lines[0][19]
596
        chains = OrderedDict()
597
        resi_prev = None
598
        chain_prev = None
599
600
        for l in self.lines:
601
            if l.startswith('ATOM') or l.startswith('HETATM'):
602
                resi = int(l[22:26])
603
                if resi_prev != resi:
604
                    resname = l[17:20].strip()
605
                    chain_curr = l[21]
606
                    if resname in AMINOACID_CODES:
607
                        resname = aa3to1(resname)
608
                    if len(resname) == 'GTP':  # DG -> g GTP
609
                        resname = 'g'
610
                    if len(resname) > 1:  # DG -> g GTP
611
                        resname = resname[-1].lower()
612
                    try:
613
                        chains[chain_curr]['resi'].append(resi)
614
                        chains[chain_curr]['seq'].append(resname)
615
                    except KeyError:
616
                        chains[chain_curr] = {}
617
                        chains[chain_curr]['resi'] = []
618
                        chains[chain_curr]['resi'].append(resi)
619
                        chains[chain_curr]['seq'] = []
620
                        chains[chain_curr]['seq'].append(resname)
621
622
                    resi_prev = resi
623
                    chain_prev = chain_curr
624
                    
625
        def color_seq(seq, color):
626
            if not color:
627
                return seq
628
            else:
629
                from termcolor import colored
630
                seqc = ''
631
                for s in seq:
632
                    if s in ['G']:
633
                        seqc += colored(s, 'green')
634
                    if s in ['G']:
635
                        seqc += colored(s, 'red')
636
                    if s in ['T', 'U']:
637
                        seqc += colored(s, 'blue')                        
638
                    if s in ['C']:
639
                        seqc += colored(s, 'yellow')                        
640
                return seqc
641
            
642
643
        for c in list(chains.keys()):
644
            header = c + ':' + str(chains[c]['resi'][0]) + '-'  # add A:1-
645
            for i in range(1, len(chains[c]['resi'])):  # start from second element
646
                if chains[c]['resi'][i] - chains[c]['resi'][i - 1] > 1:
647
                    header += '' + str(chains[c]['resi'][i - 1]) + ' '
648
                    header += '' + str(chains[c]['resi'][i]) + '-'
649
            header += '' + str(chains[c]['resi'][-1])
650
            chains[c]['header'] = header  # add -163 (last element)
651
652
        if compact:
653
            txt = ''
654
            for c in list(chains.keys()):
655
                if chainfirst:
656
                    txt += '' + chains[c]['header'].ljust(15) + color_seq(''.join(chains[c]['seq']), color) + ' '
657
                elif fasta:
658
                    txt += color_seq(''.join(chains[c]['seq']), color) + ' '
659
                else:
660
                    txt += color_seq(''.join(chains[c]['seq']), color) + ' # ' + chains[c]['header'] + ' '
661
            return txt.strip()
662
        else:
663
            txt = ''
664
            for c in list(chains.keys()):
665
                txt += '>' + addfn + chains[c]['header'] + '\n'
666
                txt += color_seq(''.join(chains[c]['seq']), color) + '\n' # color ;-)
667
            return txt.strip()
668
669
    def __get_seq(self):
670
        """get_seq DEPRECATED
671
672
        You get `chains` such as:
673
        OrderedDict([('A', {'header': 'A:1-47', 'seq': 'CGUGGUUAGGGCCACGUUAAAUAGUUGCUUAAGCCCUAAGCGUUGAU'}), ('B', {'header': 'B:48-58', 'seq': 'AUCAGGUGCAA'})])
674
675
        .. warning:: take only ATOM and HETATM lines.
676
        """
677
        seq = ''
678
        curri = int(self.lines[0][22:26])
679
        seq = self.lines[0][19]
680
        chains = OrderedDict()
681
        curri = -100000000000000000000000000000000  # ugly
682
        chain_prev = None
683
684
        for l in self.lines:
685
            if l.startswith('ATOM') or l.startswith('HETATM'):
686
                resi = int(l[22:26])
687
                if curri != resi:
688
                    print(l)
689
                    resname = l[17:20].strip()
690
                    if len(resname) == 'GTP':  # DG -> g GTP
691
                        resname = 'g'
692
                    if len(resname) > 1:  # DG -> g GTP
693
                        resname = resname[-1].lower()
694
695
                    seq += resname
696
                    chain_curr = l[21]
697
698
                    # if distances between curr res and previevs is bigger than 1, then show it as a fragment
699
                    if resi - curri > 1 and resi - curri < 100000000000000000000000000000000:  # ugly hack
700
                        print(resi - curri)
701
                        chains[chain_prev]['header'] += '-' + str(resi_prev)
0 ignored issues
show
introduced by
The variable resi_prev does not seem to be defined for all execution paths.
Loading history...
702
                    if chain_prev != chain_curr and chain_prev:
703
                        chains[chain_prev]['header'] += '-' + str(resi_prev)
704
                    if chaoin_curr in chains:
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable chaoin_curr does not seem to be defined.
Loading history...
705
                        chains[chain_curr]['seq'] += resname
706
                    else:
707
                        chains[chain_curr] = dict()
708
                        chains[chain_curr]['header'] = chain_curr + ':' + str(resi)  # resi_prev)
709
                        chains[chain_curr]['seq'] = resname
710
                    resi_prev = resi
711
                    chain_prev = chain_curr
712
                curri = resi
713
        chains[chain_prev]['header'] += '-' + str(resi_prev)
714
        seq = ''
715
        for c in list(chains.keys()):
716
            seq += '> ' + os.path.basename(self.fn) + ' ' + chains[c]['header'] + '\n'
717
            seq += chains[c]['seq'] + '\n'
718
        return seq.strip()
719
720
    def get_info_chains(self):
721
        """return A:3-21 B:22-32
722
        """
723
        seq = ''
724
        curri = int(self.lines[0][22:26])
725
        seq = self.lines[0][19]
726
        chains = OrderedDict()
727
        curri = -100000000000000  # ugly
728
        chain_prev = None
729
        for l in self.lines:
730
            if l.startswith('ATOM') or l.startswith('HETATM'):
731
                resi = int(l[22:26])
732
                if curri != resi:
733
                    resname = l[17:20].strip()
734
                    if len(resname) == 'GTP':  # DG -> g GTP
735
                        resname = 'g'
736
                    if len(resname) > 1:  # DG -> g GTP
737
                        resname = resname[-1].lower()
738
                    seq += resname
739
                    chain_curr = l[21]
740
                    if chain_prev != chain_curr and chain_prev:
741
                        chains[chain_prev]['header'] += '-' + str(resi_prev)
0 ignored issues
show
introduced by
The variable resi_prev does not seem to be defined for all execution paths.
Loading history...
742
                    if chain_curr in chains:
743
                        chains[chain_curr]['seq'] += resname
744
                    else:
745
                        chains[chain_curr] = dict()
746
                        chains[chain_curr]['header'] = chain_curr + ':' + str(resi)  # resi_prev)
747
                        chains[chain_curr]['seq'] = resname
748
                    resi_prev = resi
749
                    chain_prev = chain_curr
750
                curri = resi
751
        chains[chain_prev]['header'] += '-' + str(resi_prev)
752
        seq = ''
753
        for c in list(chains.keys()):
754
            seq += chains[c]['header'] + ' '
755
        return seq.strip()
756
757
    def detect_file_format(self):
758
        pass
759
760
    def detect_molecule_type(self):
761
        aa = []
762
        na = []
763
        for r in self.res:
764
            if r in AMINOACID_CODES:
765
                aa.append(r)
766
            if r in RESS:
767
                na.append(r)
768
769
        aa = float(len(aa)) / len(self.res)
770
        na = float(len(na)) / len(self.res)
771
772
        if aa == 0 and na == 0:
773
            return 'error'
774
        if aa > na:
775
            return '>protein< vs na', aa, na
776
        else:
777
            return 'protein vs >na<', aa, na
778
779
    def get_head(self):
780
        return '\n'.join(self.lines[:5])
781
782
    def get_tail(self):
783
        return '\n'.join(self.lines[-5:])
784
785
    def get_preview(self):
786
        t = '\n'.join(self.lines[:5])
787
        t += '\n-------------------------------------------------------------------\n'
788
        t += '\n'.join(self.lines[-5:])
789
        return t
790
791
    def remove_hydrogen(self):
792
        lines = []
793
        for l in self.lines:
794
            if l[77:79].strip() == 'H':
795
                continue
796
            if l[12:16].strip() in HYDROGEN_NAMES:
797
                # if l[12:16].strip().startswith('H'):
798
                continue
799
            else:
800
                # print l[12:16]
801
                lines.append(l)
802
        self.lines = lines
803
804
    def remove_water(self):
805
        """Remove HOH and TIP3"""
806
        lines = []
807
        for l in self.lines:
808
            if l[17:21].strip() in ['HOH', 'TIP3', 'WAT']:
809
                continue
810
            else:
811
                lines.append(l)
812
        self.lines = lines
813
814
    def remove_ion(self):
815
        """
816
    TER    1025        U A  47
817
    HETATM 1026 MG    MG A 101      42.664  34.395  50.249  1.00 70.99          MG
818
    HETATM 1027 MG    MG A 201      47.865  33.919  48.090  1.00 67.09          MG
819
        :rtype: object
820
        """
821
        lines = []
822
        for l in self.lines:
823
            element = l[76:78].strip().upper()
824
            element2 = l[17:20].strip().upper()
825
            if element in IONS:
826
                continue
827
            if element2 in IONS:
828
                continue
829
            else:
830
                lines.append(l)
831
        self.lines = lines
832
833
    def fixU__to__U(self):
834
        lines = []
835
        for l in self.lines:
836
            if l.startswith('ATOM') or l.startswith('HETATM'):
837
                rn = l[17:20]
838
                rn = rn.replace('G  ', '  G')
839
                rn = rn.replace('U  ', '  U')
840
                rn = rn.replace('C  ', '  C')
841
                rn = rn.replace('A  ', '  A')
842
                l = l[:16] + ' ' + rn + ' ' + l[21:]
843
            # print l.strip()
844
            # print l2
845
            #l = l.replace(' U   ', '   U ')
846
            #l = l.replace(' G   ', '   G ')
847
            #l = l.replace(' A   ', '   A ')
848
            #l = l.replace(' C   ', '   C ')
849
            lines.append(l)
850
        print ('fixU__to__U OK')
851
        self.report.append('  Fix: U__ -> __U')
852
        self.lines = lines
853
854
    def resn_as_dna(self):
855
        lines = []
856
        for l in self.lines:
857
            if l.startswith('ATOM') or l.startswith('HETATM'):
858
                # print l
859
                nl = l.replace('DA5', ' DA')  # RA should be the last!!!!
860
                nl = nl.replace('DA3', ' DA')
861
                nl = nl.replace(' DA', ' DA')
862
                nl = nl.replace(' rA', ' DA')
863
864
                nl = nl.replace('DC5', ' DC')
865
                nl = nl.replace('DC3', ' DC')
866
                nl = nl.replace(' DC', ' DC')
867
                nl = nl.replace(' rC', ' DC')
868
869
                nl = nl.replace('DG5', ' DG')
870
                nl = nl.replace('DG3', ' DG')
871
                nl = nl.replace(' DG', ' DG')
872
                nl = nl.replace(' rG', ' DG')
873
874
                nl = nl.replace('DU5', ' DU')
875
                nl = nl.replace('DU3', ' DU')
876
                nl = nl.replace(' DU', ' DU')
877
                nl = nl.replace(' rU', ' DU')
878
879
                nl = nl.replace('DT5', ' DT')
880
                nl = nl.replace('DT3', ' DT')
881
                nl = nl.replace(' DT', ' DT')
882
                nl = nl.replace(' rT', ' DT')
883
884
                nl = nl.replace('C5M', 'C7 ')
885
886
                if l[17:20].strip() == 'G':
887
                    nl = nl[:17] + ' DG' + nl[20:]
888
889
                if l[17:20].strip() == 'C':
890
                    nl = nl[:17] + ' DC' + nl[20:]
891
892
                if l[17:20].strip() == 'T':
893
                    nl = nl[:17] + ' DT' + nl[20:]
894
895
                if l[17:20].strip() == 'U':
896
                    nl = nl[:17] + ' DU' + nl[20:]
897
898
                if l[17:20].strip() == 'A':
899
                    nl = nl[:17] + ' DA' + nl[20:]
900
901
                lines.append(nl)
902
            if l.startswith("END") or l.startswith("TER"):
903
                lines.append(l)
904
905
        print ('resn_as_dna')
906
        self.report.append('  resn_as_dna')
907
        self.lines = lines
908
909
    def fix_O_in_UC(self):
910
        """.. warning: remove RU names before using this function"""
911
        lines = []
912
        for l in self.lines:
913
            # if l[12:16].strip() in
914
            # if l[12:16].strip().startswith('H'):
915
            nl = l.replace('O     U',
916
                           'O2    U')
917
            nl = nl.replace('O     C',
918
                            'O2    C')
919
            lines.append(nl)
920
        self.lines = lines
921
922
    def fix_op_atoms(self):
923
        """Replace OXP' to OPX1, e.g ('O1P' -> 'OP1')"""
924
        lines = []
925
        for l in self.lines:
926
            nl = l.replace('*', '\'')
927
            nl = nl.replace('O1P', 'OP1')
928
            nl = nl.replace('O2P', 'OP2')
929
            nl = nl.replace('O3P', 'OP3')
930
            lines.append(nl)
931
        self.lines = lines
932
933
    def get_report(self):
934
        """
935
        Returns:
936
            string: report, messages collected on the way of parsing this file
937
        """
938
        return '\n'.join(self.report)
939
940
    def is_rna(self):
941
        wrong = []
942
        for r in self.res:
943
            if r.upper().strip() in ['RC', 'RU', 'RA', 'RG', 'RT']:
944
                if r not in wrong_res:
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable wrong_res does not seem to be defined.
Loading history...
945
                    wrong_res.append(r)
946
        return wrong_res
947
948
    def check_res_if_std_dna(self):
949
        wrong_res = []
950
        for r in self.res:
951
            if r.upper().strip() in ['A', 'T', 'C', 'G']:
952
                if r not in wrong_res:
953
                    wrong_res.append(r)
954
        return wrong_res
955
956
    def check_res_if_supid_rna(self):
957
        wrong_res = []
958
        for r in self.res:
959
            if r.upper().strip() in ['RC', 'RU', 'RA', 'RG', 'RT']:
960
                if r not in wrong_res:
961
                    wrong_res.append(r)
962
        return wrong_res
963
964
965
    def is_rna(self):
966
        for r in self.res:
967
            if r.upper().strip() in ['RC', 'RU', 'RA', 'RG', 'RT']:
968
                if r not in wrong_res:
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable wrong_res does not seem to be defined.
Loading history...
969
                    wrong_res.append(r)
970
        return wrong_res
971
972
    def renum_atoms(self):
973
        """Renum atoms, from 1 to X for line; ATOM/HETATM"""
974
        lines = []
975
        c = 1
976
        for l in self.lines:
977
            l = l[:6] + str(c).rjust(5) + l[11:]
978
            c += 1
979
            lines.append(l)
980
        self.lines = lines
981
982
    def std_resn(self):
983
        """'Fix' residue names which means to change them to standard, e.g. RA5 -> A
984
985
        Works on self.lines, and returns the result to self.lines.
986
987
        Will change things like::
988
989
            # URI -> U, URA -> U
990
            1xjr_clx_charmm.pdb:ATOM    101  P   URA A   5      58.180  39.153  30.336  1.00 70.94
991
            rp13_Dokholyan_1_URI_CYT_ADE_GUA_hydrogens.pdb:ATOM  82  P   URI A   4     501.633 506.561 506.256  1.00  0.00         P
992
        """
993
        lines = []
994
        for l in self.lines:
995
            nl = l.replace('RA5', '  A')  # RA should be the last!!!!
996
            nl = nl.replace('RA3', '  A')
997
            nl = nl.replace('ADE', '  A')
998
            nl = nl.replace(' RA', '  A')
999
            nl = nl.replace(' rA', '  A')
1000
1001
            nl = nl.replace('RC5', '  C')
1002
            nl = nl.replace('RC3', '  C')
1003
            nl = nl.replace('CYT', '  C')
1004
            nl = nl.replace(' RC', '  C')
1005
            nl = nl.replace(' rC', '  C')
1006
1007
            nl = nl.replace('RG5', '  G')
1008
            nl = nl.replace('RG3', '  G')
1009
            nl = nl.replace('GUA', '  G')
1010
            nl = nl.replace(' RG', '  G')
1011
            nl = nl.replace(' rG', '  G')
1012
1013
            nl = nl.replace('RU5', '  U')
1014
            nl = nl.replace('RU3', '  U')
1015
            nl = nl.replace('URA', '  U')
1016
            nl = nl.replace('URI', '  U')
1017
            nl = nl.replace('URY', '  U')
1018
            nl = nl.replace(' RU', '  U')
1019
            nl = nl.replace(' rU', '  U')
1020
1021
            nl = nl.replace('RT5', '  T')
1022
            nl = nl.replace('RT3', '  T')
1023
            nl = nl.replace('THY', '  T')
1024
            nl = nl.replace(' RT', '  T')
1025
            nl = nl.replace(' rT', '  T')
1026
1027
            lines.append(nl)
1028
1029
        self.lines = lines
1030
1031
    def check_res_if_std_prot(self):
1032
        wrong = []
1033
        for r in self.res:
1034
            if r not in AMINOACID_CODES:
1035
                wrong.append(r)
1036
        return wrong
1037
1038
    def write(self, outfn='', verbose=False):
1039
        """Write ```self.lines``` to a file (and add END file)
1040
1041
        Args:
1042
            outfn (str): file to save, if outfn is '', then simply use self.fn
1043
            verbose (Boolen): be verbose or not
1044
1045
        Returns:
1046
            None
1047
1048
        """
1049
        if outfn == '':
1050
            outfn = self.fn
1051
        f = open(outfn, 'w')
1052
        # test if there is anything to write, if not, it's likely that the
1053
        # file is not a PDB file, e.g. .outCR
1054
        if not self.lines:
1055
            raise Exception('Nothing to write. Is the input a PDB file? ', self.fn)
1056
        for l in self.lines:
1057
            f.write(l + '\n')
1058
        if not l.startswith('END'):
0 ignored issues
show
introduced by
The variable l does not seem to be defined in case the for loop on line 1056 is not entered. Are you sure this can never be the case?
Loading history...
1059
            f.write('END')
1060
        f.close()
1061
        if verbose:
1062
            print('Write %s' % outfn)
1063
1064
    def get_atom_num(self, line):
1065
        """Extract atom number from a line of PDB file
1066
        Arguments:
1067
          * line = ATOM line from a PDB file
1068
        Output:
1069
          * atom number (int)
1070
        """
1071
        return int(''.join([x for x in line[6:11] if x.isdigit()]))
1072
1073
    def get_res_num(self, line):
1074
        """Extract residue number from a line of PDB file
1075
        Arguments:
1076
          * line = ATOM line from a PDB file
1077
        Output:
1078
          * residue number as an integer
1079
        """
1080
        return int(''.join([x for x in line[22:27] if x.isdigit()]))
1081
1082
    def get_res_code(self, line):
1083
        """Get residue code from a line of a PDB file
1084
        """
1085
        #if not line.startswith('ATOM'):
1086
        #    return None
1087
        return line[17:20]
1088
1089
    def shift_atom_names(self):
1090
        nlines = []
1091
        for l in self.lines:
1092
            if l.startswith('ATOM'):
1093
                atom_name = self.get_atom_code(l)
1094
                l = self.set_atom_code(l, atom_name)
1095
            nlines.append(l)
1096
        self.lines = nlines
1097
1098
    def prune_elements(self):
1099
        nlines = []
1100
        for l in self.lines:
1101
            if l.startswith('ATOM'):
1102
                l = l[:76] + ' ' + l[78:]
1103
            nlines.append(l)
1104
        self.lines = nlines
1105
1106
    def get_atom_code(self, line):
1107
        """Get atom code from a line of a PDB file
1108
        """
1109
        if not line.startswith('ATOM'):
1110
            return None
1111
        return line[12:16].replace(' ', '').strip()
1112
1113
    def get_atom_coords(self, line):
1114
        """Get atom coordinates from a line of a PDB file
1115
        """
1116
        if not line.startswith('ATOM'):
1117
            return None
1118
        return tuple(map(float, line[31:54].split()))
1119
1120
    def set_atom_coords(self, line, x, y, z):
1121
        """Get atom coordinates from a line of a PDB file
1122
        """
1123
        line = line[:30] + (f"{x:>8.3f}") + line[38:]
1124
        line = line[:38] + (f"{y:>8.3f}") + line[46:]
1125
        line = line[:46] + (f"{z:>8.3f}") + line[54:]
1126
        return  line
1127
1128
    def set_line_bfactor(self, line, bfactor):
1129
        if not line.startswith('ATOM'):
1130
            return None
1131
        return line[:60] + (" %5.2f" % bfactor) + line[66:]
1132
1133
    def set_atom_occupancy(self, line, occupancy):
1134
        """set occupancy for line"""
1135
        return line[:54] + (" %5.2f" % occupancy) + line[60:]
1136
1137
    def set_atom_code(self, line, code):
1138
        """Add atom name/code:
1139
        
1140
           ATOM      1  OP2   C A   1      29.615  36.892  42.657  1.00  1.00          O
1141
                        ^^^                                                            ^ and element
1142
        """
1143
        return line[:12] + ' ' + code + ' ' * (3 - len(code)) + line[16:77] + code[0] + line[78:]
1144
1145
    def set_res_code(self, line, code):
1146
        """
1147
        Args:
1148
            lines
1149
            code
1150
        path (str): The path of the file to wrap
1151
        field_storage (FileStorage): The :class:Y instance to wrap
1152
            temporary (bool): Whether or not to delete the file when the File
1153
            instance is destructed
1154
1155
        Returns:
1156
            BufferedFileStorage: A buffered writable file descriptor
1157
        """
1158
        return line[:17] + code.rjust(3) + line[20:]
1159
1160
    def get_chain_id(self, line):
1161
        return line[21:22]
1162
1163
    def get_all_chain_ids(self):
1164
        """
1165
        Returns:
1166
           set: chain ids, e.g. set(['A', 'B'])
1167
        """
1168
        chain_ids = []
1169
        for l in self.lines:
1170
            chain_id = self.get_chain_id(l)
1171
            if chain_id and chain_id not in chain_ids:
1172
                chain_ids.append(chain_id)
1173
        return chain_ids
1174
1175
    def get_atom_index(self, line):
1176
        try:
1177
            return int(line[6:11])
1178
        except:
1179
            return None
1180
1181
    def set_atom_index(self, line, index):
1182
        return line[:6] + str(index).rjust(5) + line[11:]
1183
1184
    def reindex_atom_index(self):
1185
        self.nlines = []
1186
        for i, l in enumerate(self.lines):
1187
            nl = self.set_atom_index(l, i + 1)  # start from 1
1188
            self.nlines.append(nl)
1189
        self.lines = self.nlines
1190
        return self.nlines
1191
1192
    def get_res_index(self, line):
1193
        return int(line[22:26])
1194
1195
    def set_res_index(self, line, index):
1196
        return line[:23] + str(index).rjust(3) + line[26:]
1197
1198
    def set_chain_id(self, line, chain_id):
1199
        return line[:21] + chain_id + line[22:]
1200
1201
    def get_rnapuzzle_ready(self, renumber_residues=True, fix_missing_atoms=True,
1202
                            rename_chains=True,
1203
                            ignore_op3 = False,
1204
                            report_missing_atoms=True, keep_hetatm=False, backbone_only=False,
1205
                            no_backbone = False, bases_only = False, save_single_res = False,
1206
                            ref_frame_only = False,
1207
                            check_geometry = False,
1208
                            verbose=False):  # :, ready_for="RNAPuzzle"):
1209
        """Get rnapuzzle (SimRNA) ready structure.
1210
1211
        Clean up a structure, get current order of atoms.
1212
1213
        :param renumber_residues: boolean, from 1 to ..., second chain starts from 1 etc.
1214
        :param fix_missing_atoms: boolean, superimpose motifs from the minilibrary and copy-paste missing atoms, this is super crude, so should be used with caution.
1215
1216
        Submission format @http://ahsoka.u-strasbg.fr/rnapuzzles/
1217
1218
        Run :func:`rna_tools.rna_tools.lib.RNAStructure.std_resn` before this function to fix names.
1219
        
1220
        .. image:: ../pngs/rebuild_op1op2_backbone.png
1221
        Figure: (Starting from left) input structure, structure with rebuilded atoms, and reference. The B fragment is observed in the reference used here as a “benchmark”, fragment A is reconstructed atoms (not observed in the reference"). 201122
1222
1223
        - 170305 Merged with get_simrna_ready and fixing OP3 terminal added
1224
        - 170308 Fix missing atoms for bases, and O2'
1225
1226
        .. image:: ../pngs/fix_missing_o_before_after.png
1227
        Fig. Add missing O2' atom (before and after).
1228
1229
        .. image:: ../pngs/fix_missing_superposition.png
1230
        Fig. The residue to fix is in cyan. The G base from the library in red. Atoms O4', C2', C1' are shared between the sugar (in cyan) and the G base from the library (in red). These atoms are used to superimpose the G base on the sugar, and then all atoms from the base are copied to the residues.
1231
1232
        .. image:: ../pngs/fix_missing_bases.png
1233
        **Fig.** Rebuild ACGU base-less. It's not perfect but good enough for some applications.
1234
1235
        .. warning:: It was only tested with the whole base missing!
1236
1237
        .. warning:: requires: Biopython
1238
1239
        Selection of atoms:
1240
        
1241
        - posphate group (3x, OP1 ,P, OP2),
1242
        - connector (2x O5', C5'), /5x
1243
        - sugar (5x, C4', O4', C3', O3', C1', C2'), /10
1244
        - extra oxygens from sugar (2x, O2' O3'), for now it's /12!
1245
        - A (10x), G (11x), C (8x), U(8x), max 12+11=23
1246
1247
        And 27 unique atoms: {'N9', 'O2', 'OP2', "O2'", "O4'", 'C8', "O3'", "C1'", 'C2', 'C6', "C5'", 'N6', 'C5', "C4'", 'C4', "O5'", "C3'", 'O6', 'N2', 'N7', 'OP1', 'N1', 'N4', 'P', "C2'", 'N3', 'O4'}.
1248
        """
1249
1250
        if verbose:
1251
            logger.setLevel(logging.DEBUG)
1252
1253
        try:
1254
            from Bio import PDB
1255
            from Bio.PDB import PDBIO
1256
            import warnings
1257
            warnings.filterwarnings('ignore', '.*Invalid or missing.*',)
1258
            warnings.filterwarnings('ignore', '.*with given element *',)
1259
        except:
1260
            sys.exit('Error: Install biopython to use this function (pip biopython)')
1261
1262
        import copy
1263
        # for debugging
1264
        #renumber_residues = True
1265
        # if ready_for == "RNAPuzzle":
1266
1267
        if backbone_only:
1268
            G_ATOMS = "P OP1 OP2 O5' C5'".split()
1269
            A_ATOMS = "P OP1 OP2 O5' C5'".split()
1270
            U_ATOMS = "P OP1 OP2 O5' C5'".split()
1271
            C_ATOMS = "P OP1 OP2 O5' C5'".split()
1272
            #G_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1'".split()
1273
            #A_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1'".split()
1274
            #U_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1'".split()
1275
            #C_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1'".split()
1276
1277
        elif ref_frame_only:
1278
            G_ATOMS = "P OP1 OP2".split()
1279
            A_ATOMS = "P OP1 OP2".split()
1280
            U_ATOMS = "P OP1 OP2".split()
1281
            C_ATOMS = "P OP1 OP2".split()
1282
            
1283
        elif no_backbone:
1284
            G_ATOMS = "C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 O6 N1 C2 N2 N3 C4".split() 
1285
            A_ATOMS = "C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split()     
1286
            U_ATOMS = "C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 O4 C5 C6".split()
1287
            C_ATOMS = "C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 N4 C5 C6".split()
1288
1289
        elif bases_only:
1290
            G_ATOMS = "N9 C8 N7 C5 C6 O6 N1 C2 N2 N3 C4".split()
1291
            A_ATOMS = "N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split()
1292
            U_ATOMS = "N1 C2 O2 N3 C4 O4 C5 C6".split()
1293
            C_ATOMS = "N1 C2 O2 N3 C4 N4 C5 C6".split()
1294
        else:
1295
            G_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 O6 N1 C2 N2 N3 C4".split() # 23 
1296
            A_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split()    # 22
1297
            U_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 O4 C5 C6".split()          # 20
1298
            C_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 N4 C5 C6".split()          # 20
1299
1300
        # hmm.. is it the same as RNApuzzle???
1301
        # if ready_for == "SimRNA":
1302
        #    G_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 O6 N1 C2 N2 N3 C4".split()
1303
        #    A_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split()
1304
        #    U_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 O4 C5 C6".split()
1305
        #    C_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 N4 C5 C6".split()
1306
1307
        tf = tempfile.NamedTemporaryFile(delete=False)
1308
        ftmp = tf.name
1309
        self.write(ftmp, verbose=False)
1310
1311
        parser = PDB.PDBParser()
1312
        #try:
1313
        struct = parser.get_structure('', ftmp)
1314
        #except:
1315
        #    print('Error in ' + self.fn)
1316
        #    sys.exit(1)
1317
        model = struct[0]
1318
1319
        s2 = PDB.Structure.Structure(struct.id)
1320
        m2 = PDB.Model.Model(model.id)
1321
1322
        chains2 = []
1323
1324
        missing = []
1325
        fixed = []
1326
        protein_chains_remmoved = []
1327
1328
        new_chains = list(string.ascii_uppercase)
1329
        remarks = []
1330
        if ignore_op3:
1331
             remarks.append("REMARK 250  don't add OP3 at the start of chain")
1332
        # get path to mini db with models to rebuilt structures
1333
        currfn = __file__
1334
        if currfn == '':
1335
            path = '.'
1336
        else:
1337
            path = os.path.dirname(currfn)
1338
        if os.path.islink(currfn):  # path + os.sep + os.path.basename(__file__)):
1339
            path = os.path.dirname(os.readlink(
1340
                path + os.sep + os.path.basename(currfn)))
1341
1342
        # for each chain #
1343
        for chain in model.get_list():
1344
            logger.debug('chain: %s' % chain)
1345
1346
            # is it RNA? ############################
1347
            protein_like = 0
1348
            for c, r in enumerate(chain, 1):
1349
                if r.resname in AMINOACID_CODES:
1350
                    protein_like += 1
1351
            if (protein_like / float(c + 1)) > .8:  # 90%
0 ignored issues
show
introduced by
The variable c does not seem to be defined in case the for loop on line 1343 is not entered. Are you sure this can never be the case?
Loading history...
1352
                protein_chains_remmoved.append(chain.get_id())
1353
            # ######################################
1354
1355
            res = []
1356
            for r in chain:
1357
                res.append(r)
1358
1359
            res = copy.copy(res)
1360
1361
            # start chains from A..BCD. etc
1362
1363
            if rename_chains:
1364
                try:
1365
                    chain.id = new_chains.pop(0)
1366
                except ValueError:
1367
                    # ValueError: Cannot change id from `A` to `A`.
1368
                    # The id `A` is already used for a sibling of this entity.
1369
                    # so keep it as it was
1370
                    pass
1371
1372
            c2 = PDB.Chain.Chain(chain.id)
1373
1374
            c = 1  # new chain, goes from 1 !!! if renumber True
1375
            prev_r = '' # init prev_r 
1376
1377
            for r in res:
1378
                #
1379
                #  deal with heteratm
1380
                #
1381
                if r.id[0].startswith('H_') or r.id[0].startswith('W'):
1382
                    if keep_hetatm:
1383
                        c2.add(r)
1384
                    else:
1385
                        # str(r) % r.id[0].replace('H_', '').strip())
1386
                        remarks.append('REMARK 250 Hetatom to be removed from file: ' + str(r))
1387
                    continue
1388
                # hack for amber/qrna
1389
                r.resname = r.resname.strip()
1390
                if r.resname == 'RC3':
1391
                    r.resname = 'C'
1392
                if r.resname == 'RU3':
1393
                    r.resname = 'U'
1394
                if r.resname == 'RG3':
1395
                    r.resname = 'G'
1396
                if r.resname == 'RA3':
1397
                    r.resname = 'A'
1398
1399
                if r.resname == 'C3':
1400
                    r.resname = 'C'
1401
                if r.resname == 'U3':
1402
                    r.resname = 'U'
1403
                if r.resname == 'G3':
1404
                    r.resname = 'G'
1405
                if r.resname == 'A3':
1406
                    r.resname = 'A'
1407
1408
                if r.resname == 'RC5':
1409
                    r.resname = 'C'
1410
                if r.resname == 'RU5':
1411
                    r.resname = 'U'
1412
                if r.resname == 'RG5':
1413
                    r.resname = 'G'
1414
                if r.resname == 'RA5':
1415
                    r.resname = 'A'
1416
1417
                if r.resname == 'C5':
1418
                    r.resname = 'C'
1419
                if r.resname == 'U5':
1420
                    r.resname = 'U'
1421
                if r.resname == 'G5':
1422
                    r.resname = 'G'
1423
                if r.resname == 'A5':
1424
                    r.resname = 'A'
1425
1426
                if r.resname.strip() == 'RC':
1427
                    r.resname = 'C'
1428
                if r.resname.strip() == 'RU':
1429
                    r.resname = 'U'
1430
                if r.resname.strip() == 'RG':
1431
                    r.resname = 'G'
1432
                if r.resname.strip() == 'RA':
1433
                    r.resname = 'A'
1434
1435
                # unmodified rna 2MG -> G and take only G atoms
1436
                if (r.resname.strip() not in ['C', 'U', 'G', 'A']) and \
1437
                        (r.resname.strip()[-1] in ['C', 'U', 'G', 'A']):
1438
                    r.resname = r.resname.strip()[-1].strip()
1439
1440
                r2 = PDB.Residue.Residue(r.id, r.resname.strip(), r.segid)
1441
                if renumber_residues:
1442
                    r2.id = (r2.id[0], c, r2.id[2])  # renumber residues
1443
                #
1444
                # experimental: fixing missing OP3.
1445
                # Only for the first residues.
1446
                #
1447
                if c == 1:
1448
                    # if p_missing
1449
                    p_missing = True
1450
                    # if p_missing:
1451
                    #    try:
1452
                    #        x = r["O5'"]
1453
                    #        x.id =       ' P'
1454
                    #        x.name =     ' P'
1455
                    #        x.fullname = ' P'
1456
                    #        print "REMARK 000 FIX O5' -> P fix in chain ", chain.id
1457
                    #    except:
1458
                    #        pass
1459
                    for a in r:
1460
                        if a.id == 'P':
1461
                            p_missing = False
1462
                    logger.debug('p_missing %s' % p_missing)
1463
1464
                    if p_missing and fix_missing_atoms:
1465
                        currfn = __file__
1466
                        if currfn == '':
1467
                            path = '.'
1468
                        else:
1469
                            path = os.path.dirname(currfn)
1470
                        if os.path.islink(currfn):  # path + os.sep + os.path.basename(__file__)):
1471
                            path = os.path.dirname(os.readlink(
1472
                                path + os.sep + os.path.basename(currfn)))
1473
1474
                        po3_struc = PDB.PDBParser().get_structure('', path + '/data/PO3_inner.pdb')
1475
                        po3 = [po3_atom for po3_atom in po3_struc[0].get_residues()][0]
1476
1477
                        r_atoms = [r["O4'"], r["C4'"], r["C3'"]]
1478
                        po3_atoms = [po3["O4'"], po3["C4'"], po3["C3'"]]
1479
1480
                        sup = PDB.Superimposer()
1481
                        sup.set_atoms(r_atoms, po3_atoms)
1482
                        rms = round(sup.rms, 3)
1483
1484
                        sup.apply(po3_struc.get_atoms())  # to all atoms of po3
1485
1486
                        r.add(po3['P'])
1487
                        r.add(po3['OP1'])
1488
                        r.add(po3['OP2'])
1489
                        try:
1490
                            r.add(po3["O5'"])
1491
                        except:
1492
                            del r["O5'"]
1493
                            r.add(po3["O5'"])
1494
1495
                        fixed.append(['add OP3 at the beginning of the chain ', chain.id, r, r.id[1]])
1496
1497
                    p_missing = False  # off this function
1498
1499
                    # save it
1500
                    #io = PDB.PDBIO()
1501
                    #io.set_structure( po3_struc )
1502
                    # io.save("po3.pdb")
1503
1504
                #
1505
                # fix missing O2'
1506
                #
1507
                o2p_missing = True
1508
                for a in r:
1509
                    logger.debug('o2p_missing: %s %s %s' % (r, o2p_missing, a.id))
1510
                    if a.id == "O2'":
1511
                        o2p_missing = False
1512
                logger.debug('o2p_missing: %s', o2p_missing)
1513
1514
                if o2p_missing and fix_missing_atoms:
1515
                    currfn = __file__
1516
                    if currfn == '':
1517
                        path = '.'
1518
                    else:
1519
                        path = os.path.dirname(currfn)
1520
                    if os.path.islink(currfn):  # path + os.sep + os.path.basename(__file__)):
1521
                        path = os.path.dirname(os.readlink(
1522
                            path + os.sep + os.path.basename(currfn)))
1523
1524
                    o2p_struc = PDB.PDBParser().get_structure('', path + '/data/o2prim.pdb')
1525
                    o2p = [o2p_atom for o2p_atom in o2p_struc[0].get_residues()][0]
1526
1527
                    try:
1528
                        r_atoms = [r["C3'"], r["C2'"], r["C1'"]]
1529
                    except:
1530
                        ic(r)
1531
                    o2p_atoms = [o2p["C3'"], o2p["C2'"], o2p["C1'"]]
1532
1533
                    sup = PDB.Superimposer()
1534
                    sup.set_atoms(r_atoms, o2p_atoms)
1535
                    rms = round(sup.rms, 3)
1536
1537
                    sup.apply(o2p_struc.get_atoms())  # to all atoms of o2p
1538
1539
                    r.add(o2p["O2'"])
1540
                    logger.debug('fixing o2p for ' % r)
1541
                    fixed.append(['add O2\' ', chain.id, r, c])
1542
1543
                o2p_missing = False  # off this function
1544
                ######## fixing of missing OP1 and OP2 atoms in backbone ###########
1545
                if fix_missing_atoms:
1546
                    if 'OP1' not in r:
1547
                        if prev_r:
1548
                            op4 = PDB.PDBParser().get_structure('', path + '/data/op4.pdb')
1549
                            op4a = [a for a in op4[0].get_residues()][0]
1550
                            if 'P' not in r:
1551
                                print("Error missing P of residue " + chain.id +  ':' + str(r.id[1]) + ". Sorry, I can't rebuild missing atoms!")
1552
                                exit()
1553
                            r_atoms = [r["O5'"], r["P"], prev_r["O3'"]] #  r["C5'"], 
1554
                            op4_atoms = [op4a["O5'"], op4a["P"], op4a["O3'"]] # op4a["C5'"]] #, 
1555
1556
                            sup = PDB.Superimposer()
1557
                            sup.set_atoms(r_atoms, op4_atoms)
1558
                            sup.apply(op4.get_atoms())
1559
1560
                            r.add(op4a["OP1"])
1561
                            r.add(op4a["OP2"])
1562
                            fixed.append(['add the missing OP1 and OP2', chain.id, r, r.id[1]])
1563
1564
                        else:
1565
                            remarks.append("REMARK 250 Warning: Can't fix missing OP1 or/and OP2 atoms of " + str(r))
1566
                #
1567
                # fix missing C (the whole base at the moment)
1568
                #
1569 View Code Duplication
                if str(r.get_resname()).strip() == "C" and fix_missing_atoms:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1570
                    for a in r:
1571
                        if a.id == "N1":
1572
                            break
1573
                    else:
1574
                        C_struc = PDB.PDBParser().get_structure('', path + '/data/C.pdb')
1575
                        C = [C_atom for C_atom in C_struc[0].get_residues()][0]
1576
1577
                        r_atoms = [r["O4'"], r["C2'"], r["C1'"]]
1578
                        C_atoms = [C["O4'"], C["C2'"], C["C1'"]]
1579
1580
                        sup = PDB.Superimposer()
1581
                        sup.set_atoms(r_atoms, C_atoms)
1582
                        rms = round(sup.rms, 3)
1583
1584
                        sup.apply(C_struc.get_atoms())  # to all atoms of C
1585
1586
                        r.add(C["N1"])
1587
                        r.add(C["C2"])
1588
                        r.add(C["O2"])
1589
                        r.add(C["N3"])
1590
                        r.add(C["C4"])
1591
                        r.add(C["N4"])
1592
                        r.add(C["C5"])
1593
                        r.add(C["C6"])
1594
1595
                        fixed.append(['add the whole base C', chain.id, r, r.id[1]])
1596
1597
                #
1598
                # fix missing U (the whole base at the moment)
1599
                #
1600 View Code Duplication
                if str(r.get_resname()).strip() == "U" and fix_missing_atoms:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1601
                    for a in r:
1602
                        if a.id == "N1":
1603
                            break
1604
                    else:
1605
                        U_struc = PDB.PDBParser().get_structure('', path + '/data/U.pdb')
1606
                        U = [U_atom for U_atom in U_struc[0].get_residues()][0]
1607
1608
                        r_atoms = [r["O4'"], r["C2'"], r["C1'"]]
1609
                        U_atoms = [U["O4'"], U["C2'"], U["C1'"]]
1610
1611
                        sup = PDB.Superimposer()
1612
                        sup.set_atoms(r_atoms, U_atoms)
1613
                        rms = round(sup.rms, 3)
1614
1615
                        sup.apply(U_struc.get_atoms())  # to all atoms of U
1616
1617
                        r.add(U["N1"])
1618
                        r.add(U["C2"])
1619
                        r.add(U["O2"])
1620
                        r.add(U["N3"])
1621
                        r.add(U["C4"])
1622
                        r.add(U["O4"])
1623
                        r.add(U["C5"])
1624
                        r.add(U["C6"])
1625
1626
                        fixed.append(['add the whole base U', chain.id, r, r.id[1]])
1627
                #
1628
                # fix missing G (the whole base at the moment)
1629
                #
1630 View Code Duplication
                if str(r.get_resname()).strip() == "G" and fix_missing_atoms:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1631
                    for a in r:
1632
                        if a.id == "N1":
1633
                            break
1634
                    else:
1635
                        G_struc = PDB.PDBParser().get_structure('', path + '/data/G.pdb')
1636
                        G = [G_atom for G_atom in G_struc[0].get_residues()][0]
1637
1638
                        r_atoms = [r["O4'"], r["C2'"], r["C1'"]]
1639
                        G_atoms = [G["O4'"], G["C2'"], G["C1'"]]
1640
1641
                        sup = PDB.Superimposer()
1642
                        sup.set_atoms(r_atoms, G_atoms)
1643
                        rms = round(sup.rms, 3)
1644
1645
                        sup.apply(G_struc.get_atoms())  # to all atoms of G
1646
1647
                        r.add(G["N9"])
1648
                        r.add(G["C8"])
1649
                        r.add(G["N7"])
1650
                        r.add(G["C5"])
1651
                        r.add(G["C6"])
1652
                        r.add(G["O6"])
1653
                        r.add(G["N1"])
1654
                        r.add(G["C2"])
1655
                        r.add(G["N2"])
1656
                        r.add(G["N3"])
1657
                        r.add(G["C4"])
1658
1659
                        fixed.append(['add the whole base G', chain.id, r, r.id[1]])
1660
                #
1661
                # fix missing A (the whole base at the moment)
1662
                #
1663 View Code Duplication
                if str(r.get_resname()).strip() == "A" and fix_missing_atoms:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1664
                    for a in r:
1665
                        if a.id == "N1":
1666
                            break
1667
                    else:
1668
                        A_struc = PDB.PDBParser().get_structure('', path + '/data/A.pdb')
1669
                        A = [A_atom for A_atom in A_struc[0].get_residues()][0]
1670
1671
                        r_atoms = [r["O4'"], r["C2'"], r["C1'"]]
1672
                        A_atoms = [A["O4'"], A["C2'"], A["C1'"]]
1673
1674
                        sup = PDB.Superimposer()
1675
                        sup.set_atoms(r_atoms, A_atoms)
1676
                        rms = round(sup.rms, 3)
1677
1678
                        sup.apply(A_struc.get_atoms())  # to all atoms of A
1679
1680
                        r.add(A["N9"])
1681
                        r.add(A["C8"])
1682
                        r.add(A["N7"])
1683
                        r.add(A["C5"])
1684
                        r.add(A["C6"])
1685
                        r.add(A["N6"])
1686
                        r.add(A["N1"])
1687
                        r.add(A["C2"])
1688
                        r.add(A["N3"])
1689
                        r.add(A["C4"])
1690
1691
                        fixed.append(['add the whole base A', chain.id, r, r.id[1]])
1692
1693
                #
1694
                # strip residues of extra atoms, not in G_ATOMS in this case
1695
                #
1696
                if str(r.get_resname()).strip() == "G":
1697
                    for an in G_ATOMS:
1698
                        if c == 1 and ignore_op3:
1699
                            if an in ['P', 'OP1', 'OP2']:
1700
                                continue
1701
                        try:
1702
                            if c == 1 and an == "O5'" and p_missing:
0 ignored issues
show
introduced by
The variable p_missing does not seem to be defined for all execution paths.
Loading history...
1703
                                r2.add(x)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable x does not seem to be defined.
Loading history...
1704
                            else:
1705
                                r2.add(r[an])
1706
                        except KeyError:
1707
                            # print 'Missing:', an, r, ' new resi', c
1708
                            missing.append([an, chain.id, r, r.id[1]])
1709
                    c2.add(r2)
1710
1711
                elif str(r.get_resname()).strip() == "A":
1712
                    for an in A_ATOMS:
1713
                        if c == 1 and ignore_op3:
1714
                            if an in ['P', 'OP1', 'OP2']:
1715
                                continue
1716
                        try:
1717
                            if c == 1 and an == "O5'" and p_missing:
1718
                                r2.add(x)
1719
                            else:
1720
                                r2.add(r[an])
1721
                        except KeyError:
1722
                            # print 'Missing:', an, r, ' new resi', c
1723
                            missing.append([an, chain.id, r, r.id[1]])
1724
                    c2.add(r2)
1725
1726
                elif str(r.get_resname()).strip() == "C":
1727
                    for an in C_ATOMS:
1728
                        if c == 1 and ignore_op3:
1729
                            if an in ['P', 'OP1', 'OP2']:
1730
                                continue
1731
                        try:
1732
                            if c == 1 and an == "O5'" and p_missing:
1733
                                r2.add(x)
1734
                            else:
1735
                                r2.add(r[an])
1736
                        except:
1737
                            # print 'Missing:', an, r, ' new resi', c
1738
                            missing.append([an, chain.id, r, r.id[1]])
1739
                    c2.add(r2)
1740
1741
                elif str(r.get_resname()).strip() == "U":
1742
                    for an in U_ATOMS:
1743
                        if c == 1 and ignore_op3:
1744
                            if an in ['P', 'OP1', 'OP2']:
1745
                                continue
1746
                        try:
1747
                            if c == 1 and an == "O5'" and p_missing:
1748
                                r2.add(x)
1749
                            else:
1750
                                r2.add(r[an])
1751
                        except KeyError:
1752
                            # print 'Missing:', an, r,' new resi', c
1753
                            missing.append([an, chain.id, r, r.id[1]])
1754
                    c2.add(r2)
1755
1756
1757
                # save each residue a sa single ife
1758
                if save_single_res:
1759
                    from Bio.PDB import Structure
1760
                    # Create an empty structure
1761
                    structure = Structure.Structure("my_structure")
1762
                    model = Model.Model(0)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable Model does not seem to be defined.
Loading history...
1763
                    chain = Chain.Chain('A')
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable Chain does not seem to be defined.
Loading history...
1764
                    chain.add(r2)
1765
                    model.add(chain)
1766
                    structure.add(model)
1767
1768
                    io = PDBIO()
1769
                    io.set_structure(structure)
1770
                    io.save(f"{self.fn}_{r2.id[1]}.pdb")
1771
1772
                prev_r = r  # hack to keep preview residue to be used in the function
1773
                            # e.g., to get an atom from this residue
1774
                c += 1
1775
            chains2.append(c2)
1776
1777
        io = PDBIO()
1778
        s2.add(m2)
1779
        for chain2 in chains2:
1780
            m2.add(chain2)
1781
        # print c2
1782
        # print m2
1783
        io.set_structure(s2)
1784
1785
        tf = tempfile.NamedTemporaryFile(delete=False)
1786
        fout = tf.name
1787
        io.save(fout)
1788
1789
        if fixed:
1790
            remarks.append('REMARK 250 Fixed atoms/residues:')
1791
            for i in fixed:
1792
                remarks.append(
1793
                    ' '.join(['REMARK 250  -', str(i[0]), 'in chain:', str(i[1]), str(i[2]), 'residue #', str(i[3])]))
1794
1795
        if missing and report_missing_atoms:
1796
            remarks.append('REMARK 250 Missing atoms:')
1797
            for i in missing:
1798
                remarks.append(' '.join(['REMARK 250   +', str(i[0]),
1799
                                         'chain:', str(i[1]), str(i[2]), 'residue #', str(i[3])]))
1800
            #raise Exception('Missing atoms in %s' % self.fn)
1801
1802
        if protein_chains_remmoved:
1803
            remarks.append('REMARK 250 Chains that seem to be proteins removed and : ' +
1804
                           ' '.join(protein_chains_remmoved))
1805
        #
1806
1807
        # fix ter 'TER' -> TER    1528        G A  71
1808
        #
1809
        s = RNAStructure(fout)
1810
        if check_geometry:
1811
            s.check_geometry()
1812
        self.lines = s.lines
1813
        c = 0
1814
        # ATOM   1527  C4    G A  71       0.000   0.000   0.000  1.00  0.00           C
1815
        nlines = []
1816
        no_ters = 0
1817
        for l in self.lines:
1818
            ## align atoms to the left #######################################################
1819
            # ATOM   3937    P   C B 185      11.596  -7.045  26.165  1.00  0.00           P
1820
            # ATOM   3937  P     C B 185      11.596  -7.045  26.165  1.00  0.00           P
1821
            if l.startswith('ATOM'):
1822
                atom_code = self.get_atom_code(l)
1823
                l = self.set_atom_code(l, atom_code)
1824
            ##################################################################################
1825
            if l.startswith('TER'):
1826
                #                pass  # leave it for now this
1827
                atom_l = self.lines[c - 1]
1828
                new_l = 'TER'.ljust(80)   # TER    1528        G A  71 <<<'
1829
                new_l = self.set_atom_index(new_l, str(self.get_atom_index(atom_l) + 1 + no_ters))
1830
                new_l = self.set_res_code(new_l, self.get_res_code(atom_l))
1831
                new_l = self.set_chain_id(new_l, self.get_chain_id(atom_l))
1832
                new_l = self.set_res_index(new_l, self.get_res_index(atom_l))
1833
                nlines.append(new_l)
1834
                #nlines.append(l)
1835
                no_ters += 1
1836
            else:
1837
                if self.get_atom_index(l):
1838
                    l = self.set_atom_index(l, self.get_atom_index(
1839
                        l) + no_ters)  # 1 ter +1 2 ters +2 etc
1840
                nlines.append(l)
1841
            c += 1
1842
        self.lines = nlines
1843
        return remarks
1844
1845
    def set_occupancy_atoms(self, occupancy):
1846
        """
1847
        :param occupancy:
1848
        """
1849
        nlines = []
1850
        for l in self.lines:
1851
            if l.startswith('ATOM'):
1852
                l = self.set_atom_occupancy(l, 0.00)
1853
                nlines.append(l)
1854
            else:
1855
                nlines.append(l)
1856
        self.lines = nlines
1857
1858
    def edit_occupancy_of_pdb(txt, pdb, pdb_out, v=False):
1859
        """Make all atoms 1 (flexi) and then set occupancy 0 for seletected atoms.
1860
        Return False if error. True if OK
1861
        """
1862
        struc = PDB.PDBParser().get_structure('struc', pdb)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable PDB does not seem to be defined.
Loading history...
1863
1864
        txt = txt.replace(' ', '')
1865
        if v:
1866
            print (txt)
1867
        l = re.split('[,:;]', txt)
1868
        if v:
1869
            print (l)
1870
1871
        for s in struc:
1872
            for c in s:
1873
                for r in c:
1874
                    for a in r:
1875
                        a.set_occupancy(1)  # make it flaxi
1876
1877
        for i in l:  # ['A', '1-10', '15', '25-30', 'B', '1-10']
1878
1879
            if i in string.ascii_letters:
1880
                if v:
1881
                    print('chain', i)
1882
                chain_curr = i
1883
                continue
1884
1885
            if i.find('-') > -1:
1886
                start, ends = i.split('-')
1887
                if start > ends:
1888
                    print('Error: range start > end ' + i)  # >>sys.stderr
1889
                    return False
1890
                index = list(range(int(start), int(ends) + 1))
1891
            else:
1892
                index = [int(i)]
1893
1894
            for i in index:
1895
                # change b_factor
1896
                try:
1897
                    atoms = struc[0][chain_curr][i]
0 ignored issues
show
introduced by
The variable chain_curr does not seem to be defined in case the for loop on line 1877 is not entered. Are you sure this can never be the case?
Loading history...
1898
                except KeyError:
1899
                    if i == chain_curr:
1900
                        print('Error: Chain ' + chain_curr +
1901
                              ' not found in the PDB structure')  # >>sys.stderr,
1902
                    else:
1903
                        print('Error: Residue ' + chain_curr + ':' + str(i) +
1904
                              ' found in the PDB structure')  # >>sys.stderr,
1905
                        return False
1906
                for a in atoms:
1907
                    a.set_occupancy(0)
1908
1909
        io = PDBIO()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable PDBIO does not seem to be defined.
Loading history...
1910
        io.set_structure(struc)
1911
        io.save(pdb_out)
1912
        print('Saved ', pdb_out)
1913
        return True
1914
1915
    def view(self):
1916
        os.system('pymol ' + self.fn)
1917
1918
    def remove(self, verbose):
1919
        """Delete file, self.fn"""
1920
        os.remove(self.fn)
1921
        if verbose:
1922
            'File %s removed' % self.fn
1923
1924
    def __repr__(self):
1925
        return 'RNAStructure %s' % self.fn
1926
1927
1928
    def get_remarks_text(self):
1929
        """Get remarks as text for given file.
1930
        This function re-open files, as define as self.fn to get remarks.
1931
1932
        Example::
1933
1934
            r = RNAStructure(fout)
1935
            remarks = r.get_remarks_txt()
1936
            r1 = r.get_res_txt('A', 1)
1937
            r2 = r.get_res_txt('A', 2)
1938
            r3 = r.get_res_txt('A', 3)
1939
            with open(fout, 'w') as f:
1940
                f.write(remarks)
1941
                f.write(r1)
1942
                f.write(r2)
1943
                f.write(r3)
1944
1945
            remarks is
1946
1947
            REMARK 250 Model edited with rna-tools
1948
            REMARK 250  ver 3.5.4+63.g4338516.dirty 
1949
            REMARK 250  https://github.com/mmagnus/rna-tools 
1950
            REMARK 250  Fri Nov 13 10:15:19 2020
1951
1952
        """
1953
        txt = ''
1954
        for l in open(self.fn):
1955
            if l.startswith("REMARK"):
1956
                txt += l
1957
        return txt
1958
1959
    def get_res_text(self, chain_id, resi):
1960
        """Get a residue of given resi of chain_id and return as a text
1961
1962
        Args:
1963
            chain_id (str): e.g., 'A'
1964
            resi (int): e.g., 1
1965
1966
        Returns:
1967
             txt: 
1968
1969
        Example::
1970
1971
            r = RNAStructure(fn)
1972
            print(r.get_res_txt('A', 1))
1973
1974
            ATOM      1  O5'   G A   1      78.080 -14.909  -0.104  1.00  9.24           O  
1975
            ATOM      2  C5'   G A   1      79.070 -15.499  -0.956  1.00  9.70           C  
1976
            ATOM      3  C4'   G A   1      78.597 -16.765  -1.648  1.00  9.64           C  
1977
            ATOM      4  O4'   G A   1      78.180 -17.761  -0.672  1.00  9.88           O  
1978
            (...)
1979
1980
        """
1981
        txt = ''
1982
        for l in self.lines:
1983
            if l.startswith("ATOM"):
1984
                if self.get_res_num(l) == resi and self.get_chain_id(l) == chain_id:
1985
                    txt += l + '\n'
1986
        return txt
1987
1988
    def mq(self, method="RASP", verbose=False):
1989
1990
        if method.lower() == "rasp":
1991
            from rna_tools.tools.mq.RASP import RASP
1992
            r = RASP.RASP()
1993
            return [float(i) for i in r.run(self.fn, potentials=['all'])]
1994
            # ['-42058.4', '466223', '-0.0902109', '0', '0', '0']
1995
1996
        if method.lower() == "dfire":
1997
            from rna_tools.tools.mq.Dfire import Dfire
1998
            r = Dfire.Dfire()
1999
            return r.run(self.fn, verbose)
2000
        
2001
        if method.lower() == "rna3dcnn":
2002
            from rna_tools.tools.mq.RNA3DCNN import RNA3DCNN
2003
            r = RNA3DCNN.RNA3DCNN()
2004
            return r.run(self.fn, verbose)
2005
2006
        if method.lower() == "qrna":
2007
            from rna_tools.tools.mq.QRNA import QRNA
2008
            r = QRNA.QRNA()
2009
            return r.run(self.fn, verbose)
2010
2011
        if method.lower() == "clashscore":
2012
            from rna_tools.tools.mq.ClashScore import ClashScore
2013
            r = ClashScore.ClashScore()
2014
            return r.run(self.fn, verbose)
2015
2016
        if method.lower() == "analyzegeometry":
2017
            from rna_tools.tools.mq.AnalyzeGeometry import AnalyzeGeometry
2018
            r = AnalyzeGeometry.AnalyzeGeometry()
2019
            return r.run(self.fn, verbose)
2020
2021
        raise MethodUnknown('Define corrent method')
2022
2023
    def get_empty_line(self):
2024
        l = "ATOM      1  C5'   A A   3      25.674  19.091   3.459  1.00  1.00           X"
2025
        return l
2026
    
2027
    def add_line(self, l):
2028
        self.lines.append(l)
2029
2030
def add_header(version=None):
2031
    now = time.strftime("%c")
2032
    txt = 'REMARK 250 Model edited with rna-tools\n'
2033
    txt += 'REMARK 250  ver %s \nREMARK 250  https://github.com/mmagnus/rna-tools \nREMARK 250  %s' % (
2034
        version, now)
2035
    return txt
2036
2037
2038
def edit_pdb(f, args):
2039
    """Edit your structure.
2040
2041
    The function can take ``A:3-21>A:1-19`` or even syntax like this
2042
    ``A:3-21>A:1-19,B:22-32>B:20-30`` and will do an editing.
2043
2044
    The output is printed, line by line. Only ATOM lines are edited!
2045
2046
    Examples::
2047
2048
        $ rna_pdb_tools.py --edit 'A:3-21>A:1-19' 1f27_clean.pdb > 1f27_clean_A1-19.pdb
2049
2050
    or even::
2051
2052
        $ rna_pdb_tools.py --edit 'A:3-21>A:1-19,B:22-32>B:20-30' 1f27_clean.pdb > 1f27_clean_renumb.pdb
2053
2054
    """
2055
    # open a new file
2056
    s = RNAStructure(f)
2057
    output = ''
2058
    if not args.no_hr:
2059
        output += add_header() + '\n'
2060
        output += 'REMARK 250    HEADER --edit ' + args.edit + '\n'
2061
2062
    # --edit 'A:3-21>A:1-19,B:22-32>B:20-30'
2063
    if args.edit.find(',') > -1:
2064
        # more than one edits
2065
        edits = args.edit.split(',')  # ['A:3-21>A:1-19', 'B:22-32>B:20-30']
2066
        selects = []
2067
        for e in edits:
2068
            selection_from, selection_to = select_pdb_fragment(
2069
                e.split('>')[0]), select_pdb_fragment(e.split('>')[1])
2070
            if len(selection_to) != len(selection_from):
2071
                raise Exception('len(selection_to) != len(selection_from)')
2072
            selects.append([selection_from, selection_to])
2073
    else:
2074
        # one edit
2075
        e = args.edit
2076
        selection_from, selection_to = select_pdb_fragment(
2077
            e.split('>')[0]), select_pdb_fragment(e.split('>')[1])
2078
        if len(selection_to) != len(selection_from):
2079
            raise Exception('len(selection_to) != len(selection_from)')
2080
        selects = [[selection_from, selection_to]]
2081
2082
    # go ever all edits: ['A:3-21>A:1-19','B:22-32>B:20-30']
2083
    for l in s.lines:
2084
        if l.startswith('ATOM'):
2085
                # get chain and resi
2086
            chain = l[21:22].strip()
2087
            resi = int(l[22:26].strip())
2088
2089
            if_selected_dont_print = False
2090
            # for selections
2091
            for select in selects:
2092
                selection_from, selection_to = select
2093
                if chain in selection_from:
2094
                    if resi in selection_from[chain]:
2095
                            # [1,2,3] mapping from [4,5,10], you want to know how to map 1
2096
                            # 1 is [0] element of first list, so you have to index first list
2097
                            # to get 0, with this 0 you can get 4 out of second list [4,5,10][0] -> 4
2098
                        nl = list(l)
2099
                        chain_new = list(selection_to.keys())[0]  # chain form second list
2100
                        nl[21] = chain_new  # new chain
2101
                        index = selection_from[chain].index(int(resi))  # get index of 1
2102
                        resi_new = str(selection_to[chain_new][index]).rjust(
2103
                            4)  # 'A' [1,2,3] -> '  1'
2104
                        nl[22:26] = resi_new
2105
                        nl = ''.join(nl)
2106
                        if_selected_dont_print = True
2107
                        output += nl + '\n'
2108
            if not if_selected_dont_print:
2109
                output += l + '\n'
2110
        else:  # if not atom
2111
            output += l + '\n'
2112
    return output
2113
2114
2115
def collapsed_view(args):
2116
    """Collapsed view of pdb file. Only lines with C5' atoms are shown and TER, MODEL, END.
2117
2118
    example::
2119
2120
        [mm] rna_tools git:(master) $ python rna-pdb-tools.py --cv input/1f27.pdb
2121
        ATOM      1  C5'   A A   3      25.674  19.091   3.459  1.00 16.99           C
2122
        ATOM     23  C5'   C A   4      19.700  19.206   5.034  1.00 12.65           C
2123
        ATOM     43  C5'   C A   5      14.537  16.130   6.444  1.00  8.74           C
2124
        ATOM     63  C5'   G A   6      11.726  11.579   9.544  1.00  9.81           C
2125
        ATOM     86  C5'   U A   7      12.007   7.281  13.726  1.00 11.35           C
2126
        ATOM    106  C5'   C A   8      12.087   6.601  18.999  1.00 12.74           C
2127
        TER"""
2128
    r = RNAStructure(args.file)
2129
    for l in r.lines:
2130
        at = r.get_atom_code(l)
2131
        if at == "P": # C5'":
2132
            print(l)
2133
        if l.startswith('TER') or l.startswith('MODEL') or l.startswith('END'):
2134
            print(l)
2135
2136
2137
def fetch(pdb_id, path="."):
2138
    """fetch pdb file from RCSB.org
2139
    https://files.rcsb.org/download/1Y26.pdb
2140
2141
    Args:
2142
    - pdb_id, but also a chain can be specified, 1jj2:A+B+C
2143
    
2144
    Returns:
2145
    - a path to a file
2146
2147
    TODO: na_pdb_tools.py --extract A:1-25+B:30-57 1jj2.pdb"""
2148
2149
    chains = ''
2150
    pdb_id = pdb_id.replace('_', ':') # to accept also 1jj2_A
2151
    if ':' in pdb_id:
2152
        pdb_id, chains = pdb_id.split(':') # >>> 'X:A+B+C'.split(':') ['X', 'A+B+C']
2153
2154
    if pdb_id == 'rp':
2155
        os.system('wget https://github.com/RNA-Puzzles/standardized_dataset/archive/master.tar.gz -O - | tar -xz')
2156
        return
2157
    
2158
    import urllib3
2159
    http = urllib3.PoolManager()
2160
2161
    # try:
2162
    pdb_id = pdb_id.replace('.pdb', '')
2163
    response = http.request('GET', 'https://files.rcsb.org/download/' + pdb_id + '.pdb')
2164
    if not response.status == 200:
2165
        raise PDBFetchError()
2166
2167
    # except urllib3.HTTPError:
2168
    #    raise Exception('The PDB does not exists: ' + pdb_id)
2169
    txt = response.data
2170
2171
    if path != '.':
2172
        npath = path + os.sep + pdb_id + '.pdb'
2173
    else:
2174
        npath = pdb_id + '.pdb'
2175
    print('downloading... ' + npath)
2176
    with open(npath, 'wb') as f:
2177
        f.write(txt)
2178
    if chains:
2179
        for chain in chains.split('+'):
2180
            cmd = f'rna_pdb_tools.py --extract-chain {chain} {pdb_id}.pdb > {pdb_id}_{chain}.pdb'
2181
            print(cmd)
2182
            exe(cmd)
2183
    # print('ok')
2184
    return npath
2185
2186
2187 View Code Duplication
def fetch_ba(pdb_id, path="."):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2188
    """fetch biological assembly pdb file from RCSB.org
2189
2190
    >>> fetch_ba('1xjr')
2191
    ...
2192
    """
2193
    try:
2194
        import urllib3
2195
    except ImportError:
2196
        print('urllib3 is required')
2197
        return
2198
    http = urllib3.PoolManager()
2199
    # try:
2200
    response = http.request('GET', url='https://files.rcsb.org/download/' +
2201
                            pdb_id.lower() + '.pdb1')
2202
    if not response.status == 200:
2203
        raise PDBFetchError()
2204
    txt = response.data
2205
2206
    npath = path + os.sep + pdb_id + '_ba.pdb'
2207
    print('downloading...' + npath)
2208
    with open(npath, 'wb') as f:
2209
        f.write(txt)
2210
    print('ok')
2211
    return pdb_id + '_ba.pdb'
2212
2213
2214 View Code Duplication
def fetch_cif_ba(cif_id, path="."):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2215
    """fetch biological assembly cif file from RCSB.org"""
2216
    import urrlib3
2217
    http = urllib3.PoolManager()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable urllib3 does not seem to be defined.
Loading history...
2218
    # try:
2219
    response = http.request('GET', url='https://files.rcsb.org/download/' +
2220
                            cif_id.lower() + '-assembly1.cif')
2221
    if not response.status == 200:
2222
        raise PDBFetchError()
2223
    txt = response.data
2224
2225
    npath = path + os.sep + cif_id + '_ba.cif'
2226
    print('downloading...' + npath)
2227
    with open(npath, 'wb') as f:
2228
        f.write(txt)
2229
    print('ok')
2230
    return cif_id + '_ba.cif'
2231
2232
2233
def replace_chain(struc_fn, insert_fn, chain_id):
2234
    """Replace chain of the main file (struc_fn) with some new chain (insert_fn) of given chain id.
2235
2236
    Args:
2237
       struc_fn (str): path to the main PDB file
2238
       insert_fn (str): path to the file that will be injected in into the main PDB file
2239
       chain_id (str): chain that will be inserted into the main PDB file
2240
2241
    Returns:
2242
       string: text in the PDB format
2243
    """
2244
    struc = RNAStructure(struc_fn)
2245
    insert = RNAStructure(insert_fn)
2246
2247
    output = ''
2248
    inserted = False
2249
    for l in struc.lines:
2250
        if l.startswith('ATOM'):
2251
            chain = l[21]
2252
            if chain == chain_id:
2253
                if not inserted:
2254
                    for insertl in insert.lines:
2255
                        if not insertl.startswith('HEADER') and not insertl.startswith('END'):
2256
                            output += insertl + '\n'
2257
                    inserted = True
2258
                continue
2259
            # insert pdb
2260
            output += l + '\n'
2261
    return output.strip()
2262
2263
2264
def replace_atoms(struc_fn, insert_fn, verbose=False):
2265
    """Replace XYZ coordinate of the file (struc_fn)  with XYZ from another file (insert_fn).
2266
2267
    This can be useful if you want to replace positions of atoms, for example, one base only. The lines are muted based on atom name, residue name, chain, residue index (marked with XXXX below).::
2268
    
2269
    ATOM     11  N1    A 2  27     303.441 273.472 301.457  1.00  0.00           N   # file
2270
    ATOM      1  N1    A 2  27     300.402 273.627 303.188  1.00 99.99           N   # insert 
2271
    ATOM     11  N1    A 2  27     300.402 273.627 303.188  1.00  0.00           N   # inserted
2272
                 XXXXXXXXXXXXXXXX    # part used to find lines to be replaced
2273
2274
     ATOM      1  P     A 2  27     295.653 270.783 300.135  1.00119.29           P   # next line
2275
2276
    Args:
2277
       struc_fn (str): path to the main PDB file
2278
       insert_fn (str): path to the file that will be injected in into the main PDB file
2279
2280
    Returns:
2281
       string: text in the PDB format
2282
    """
2283
    struc = RNAStructure(struc_fn)
2284
    insert = RNAStructure(insert_fn)
2285
2286
    output = ''
2287
    for l in struc.lines:
2288
        inserted = False
2289
        if l.startswith('ATOM'):
2290
            idx = l[13:26]#.strip() # P     A 2  27
2291
            for li in insert.lines:
2292
                idxi = li[13:26]#.strip()
2293
                if idx == idxi:
2294
                    ln = l[:30] + li[30:54] + l[54:] + '\n' # only coords replace (!)
2295
                    if verbose:
2296
                        print(l)
2297
                        print(li)
2298
                        print(ln)
2299
                        print()
2300
                    inserted = True
2301
                    output += ln
2302
            if not inserted:
2303
                output += l + '\n'
2304
                inserted = False
2305
    return output.strip()
2306
2307
def set_chain_for_struc(struc_fn, chain_id, save_file_inplace=False, skip_ter=True):
2308
    """Quick & dirty function to set open a fn PDB format, set chain_id
2309
    and save it to a file. Takes only lines with ATOM and TER."""
2310
    txt = ''
2311
    for line in open(struc_fn):
2312
        if skip_ter:
2313
            if line.startswith('ATOM') or line.startswith('TER'):
2314
                # if TER line is only TER, not like TER atom chain etc
2315
                # then just keep TER there and don't add any chain to id
2316
                if line == 'TER':
2317
                    pass
2318
                else:
2319
                    l = line[:21] + chain_id + line[22:]
2320
                txt += l
0 ignored issues
show
introduced by
The variable l does not seem to be defined for all execution paths.
Loading history...
2321
            else:
2322
                txt += line
2323
        else:
2324
            if line.startswith('ATOM'):
2325
                l = line[:21] + chain_id + line[22:]
2326
                txt += l
2327
            elif line.startswith('TER'):
2328
                continue
2329
            else:
2330
                txt += line
2331
    if save_file_inplace:
2332
        with open(struc_fn, 'w') as f:
2333
             f.write(txt)
2334
    return txt.strip()
2335
2336
def load_rnas(path, verbose=True):
2337
    """Load structural files (via glob) and return a list of RNAStructure objects.
2338
    
2339
    Examples::
2340
2341
        rnas = rtl.load_rnas('../rna_tools/input/mq/*.pdb')
2342
    
2343
    """
2344
    rs = []
2345
    import glob
2346
    for f in glob.glob(path):
2347
        r = RNAStructure(f)
2348
        rs.append(r)
2349
    return rs
2350
2351
# main
2352
if '__main__' == __name__:
2353
    f1 = "input/t2-4-ACA.pdb"
2354
    f2 = "input/to-replace.pdb"
2355
    r1 = RNAStructure(f1)
2356
    r2 = RNAStructure(f2)    
2357
    t = replace_atoms(f1, f2)
2358
    with open('output/replaced.pdb', 'w') as f: f.write(t)
2359
2360
    fn = "input/1a9l_NMR_1_2_models.pdb"
2361
    r = RNAStructure(fn)
2362
    t = r.get_res_text('A', 1)
2363
    with open('output/1a9l_NMR_1_2_models_R1.pdb', 'w') as f: f.write(t)
2364
2365
    fn = "input/remarks.pdb"
2366
    r = RNAStructure(fn)
2367
    t = r.get_remarks_text()
2368
    with open('output/remarks_only.pdb', 'w') as f: f.write(t)
2369
2370
    fn = 'input/image'
2371
    print('fn:', fn)
2372
    struc = RNAStructure(fn)
2373
    print(' pdb?:', struc.is_pdb())
2374
    # print( atoms:', struc.get_no_lines())
2375
2376
    fn = 'input/na.pdb'
2377
    s = RNAStructure(fn)
2378
    print(s.detect_molecule_type())
2379
    #res = get_all_res(na)
2380
    # print 'what is?', what_is(res)
2381
    # print res
2382
    print('non standard:', s.check_res_if_std_na())
2383
    print('is protein:', s.detect_molecule_type())
2384
2385
    fn = 'input/prot.pdb'
2386
    s = RNAStructure(fn)
2387
    print('non standard:', s.check_res_if_std_prot())
2388
    print('is protein:',  s.detect_molecule_type())
2389
2390
    fn = 'input/rna-ru.pdb'
2391
    s = RNAStructure(fn)
2392
    print('non standard:', s.check_res_if_supid_rna())
2393
    print('is protein:', s.detect_molecule_type())
2394
2395
    fn = 'input/na_highAtomNum.pdb'
2396
    print(fn)
2397
    s = RNAStructure(fn)
2398
    s.renum_atoms()
2399
    s.write('output/na_highAtomNum.pdb')
2400
2401
    fn = 'input/na_solvet_old_format.pdb'
2402
    print(fn)
2403
    s = RNAStructure(fn)
2404
    s.fix_op_atoms()
2405
    s.remove_hydrogen()
2406
    s.remove_ion()
2407
    s.remove_water()
2408
    s.write('output/na_solvet_old_format.pdb')
2409
2410
    fn = 'input/na_solvet_old_format.pdb'
2411
    print(fn)
2412
    s = RNAStructure(fn)
2413
    s.std_resn()
2414
    s.remove_hydrogen()
2415
    s.remove_ion()
2416
    s.remove_water()
2417
    s.write('output/na_solvet_old_format.pdb')
2418
2419
    #fn = 'input/na_solvet_old_format__.pdb'
2420
    #s = RNAStructure(fn)
2421
    # s.std_resn()
2422
    # s.remove_hydrogen()
2423
    # s.remove_ion()
2424
    # s.remove_water()
2425
    # s.renum_atoms()
2426
    # s.fix_op_atoms()
2427
    # s.write('output/na_solvet_old_format__.pdb')
2428
2429
    fn = 'input/1xjr.pdb'
2430
    s.std_resn()
2431
    s.remove_hydrogen()
2432
    s.remove_ion()
2433
    s.remove_water()
2434
    s.renum_atoms()
2435
    s.fix_op_atoms()
2436
    s.write('output/1xjr.pdb')
2437
2438
    fn = 'input/decoy0165_amb.pdb'
2439
    print(fn)
2440
    s = RNAStructure(fn)
2441
    s.std_resn()
2442
    s.remove_hydrogen()
2443
    s.remove_ion()
2444
    s.remove_water()
2445
    s.renum_atoms()
2446
    s.fix_O_in_UC()
2447
    s.fix_op_atoms()
2448
    s.write('output/decoy0165_amb_clx.pdb')
2449
2450
    fn = 'input/farna.pdb'
2451
    print(fn)
2452
    s = RNAStructure(fn)
2453
    s.std_resn()
2454
    s.remove_hydrogen()
2455
    s.remove_ion()
2456
    s.remove_water()
2457
    s.fix_op_atoms()
2458
    s.renum_atoms()
2459
    s.write('output/farna.pdb')
2460
2461
    fn = 'input/farna.pdb'
2462
    print(fn)
2463
2464
    r = RNAStructure(fn)
2465
    print(r.is_mol2())
2466
2467
    if True:
2468
        print('================================================')
2469
        print ("input/1xjr_clx_fChimera_noIncludeNumbers.mol2")
2470
        r = RNAStructure("input/1xjr_clx_fChimera_noIncludeNumbers.mol2")
2471
        print(r.is_mol2())
2472
        r.mol2toPDB('/tmp/x.pdb')
2473
2474
        r = RNAStructure('/tmp/x.pdb')
2475
        print(r.get_report)
2476
        r.std_resn()
2477
        r.remove_hydrogen()
2478
        r.remove_ion()
2479
        r.remove_water()
2480
        r.fix_op_atoms()
2481
        r.renum_atoms()
2482
        r.fixU__to__U()
2483
        r.write("output/1xjr_clx_fChimera_noIncludeNumbers.mol2")
2484
2485
    if True:
2486
        r = RNAStructure("input/2du3_prot_bound.mol2")
2487
        print(r.is_mol2())
2488
        outfn = r.mol2toPDB()
2489
        print(r.get_report)
2490
2491
    print('================================================')
2492
    fn = "input/3e5fA-nogtp_processed_zephyr.pdb"
2493
    r = RNAStructure(fn)
2494
    print(r.is_mol2())
2495
    #outfn = r.mol2toPDB()
2496
    print(r.is_amber_like())
2497
    print(r.get_report)
2498
2499
    print(r.get_preview())
2500
2501
    r.std_resn()
2502
2503
    print(r.get_preview())
2504
2505
    r.remove_hydrogen()
2506
    r.remove_ion()
2507
    r.remove_water()
2508
    #renum_atoms(t, t)
2509
    #fix_O_in_UC(t, t)
2510
    #fix_op_atoms(t, t)
2511
    r.write('output/3e5fA-nogtp_processed_zephyr.pdb')
2512
2513
    print()
2514
    fn = "input/1xjr_clx_charmm.pdb"
2515
    print(fn)
2516
    s = RNAStructure(fn)
2517
    s.std_resn()
2518
    s.remove_hydrogen()
2519
    s.remove_ion()
2520
    s.remove_water()
2521
    s.write('output/1xjr_clx_charmm.pdb')
2522
2523
    #renum_atoms(t, t)
2524
    #fix_O_in_UC(t, t)
2525
    #fix_op_atoms(t, t)
2526
2527
    print()
2528
    fn = "input/dna_fconvpdb_charmm22.pdb"
2529
    print(fn)
2530
    r = RNAStructure(fn)
2531
    r.get_preview()
2532
    r.resn_as_dna()
2533
    r.remove_hydrogen()
2534
    r.remove_ion()
2535
    r.remove_water()
2536
    r.std_resn()
2537
    print(r.get_head())
2538
    print(r.get_tail())
2539
    print(r.get_preview())
2540
    r.write("output/dna_fconvpdb_charmm22.pdb")
2541
2542
    print()
2543
    fn = "input/1a9l_NMR_1_2_models.pdb"
2544
    print(fn)
2545
    r = RNAStructure(fn)
2546
    r.write("output/1a9l_NMR_1_2_models_lib.pdb")
2547
    # r.get_text() # get #1 model
2548
2549
    import doctest
2550
    doctest.testmod()
2551