Passed
Push — master ( 47ffa8...75424b )
by Marcin
02:43
created

build.rna_tools.rna_pdb_tools.radius_of_gyration()   A

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 1
dl 0
loc 2
rs 10
c 0
b 0
f 0
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
"""rna_pdb_tools - a swiss army knife to manipulation of RNA pdb structures
5
6
Usage::
7
8
   $ rna_pdb_tools.py --delete A:46-56 --inplace *.pdb
9
10
    $ rna_pdb_tools.py --get-seq *
11
    # BujnickiLab_RNApuzzle14_n01bound
12
    > A:1-61
13
    # BujnickiLab_RNApuzzle14_n02bound
14
    > A:1-61
15
    CGUUAGCCCAGGAAACUGGGCGGAAGUAAGGCCCAUUGCACUCCGGGCCUGAAGCAACGCG
16
    [...]
17
18
See `rna_pdb_merge_into_one.py` to merge PDB files in the order as you like into one NMR-like (multimodel) file
19
20
Examples::
21
22
    rna_pdb_tools.py --backbone-only   --get-rnapuzzle-ready  --inplace --suffix=bo examples/4GXY_min.pdb
23
24
To extract specific atoms for each residue and write them to separate PDB file (next to the input files, following syntax "<input>_<resid>.pdb")::
25
26
    rna_pdb_tools.py --rpr input/4GXY_min.pdb --save-single-res --ref-frame-only
27
28
Atoms presets::
29
30
  --backbone-only       used only with --get-rnapuzzle-ready, keep only backbone (= remove bases)
31
  --ref-frame-only      used only with --get-rnapuzzle-ready, keep only reference frames, OP1 OP2 P
32
  --no-backbone         used only with --get-rnapuzzle-ready, remove atoms of backbone (define as P OP1 OP2 O5')
33
  --bases-only          used only with --get-rnapuzzle-ready, keep only atoms of bases
34
      
35
.. image:: ../pngs/276411138-236435ff-2944-4bec-ab75-dca0d1e3aacf.jpg
36
37
38
-v is for verbose, --version for version ;)
39
"""
40
import argparse
41
import textwrap
42
import os
43
import shutil
44
import sys
45
import tempfile
46
import glob
47
import os
48
from rna_tools.rna_tools_lib import edit_pdb, add_header, get_version, \
49
                          collapsed_view, fetch, fetch_ba, fetch_cif, replace_chain, RNAStructure, \
50
                          select_pdb_fragment, sort_strings, set_chain_for_struc
51
from rna_tools.tools.rna_x3dna.rna_x3dna import x3DNA
52
53
54
def get_parser():
55
    version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__)
56
    version = version[1].strip()
57
    parser = argparse.ArgumentParser(
58
        description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
59
60
    parser.add_argument('--version', help='', action='version', version=version)
61
62
    parser.add_argument('-r', '--report', help='get report',
63
                        action='store_true')
64
65
    parser.add_argument('--no-progress-bar', help='for --no-progress-bar for --rpr', action='store_true')
66
67
    parser.add_argument('--renum-atoms', help='renumber atoms, tested with --get-seq',
68
                        action='store_true')
69
70
    parser.add_argument('--renum-nmr', help='',
71
                        action='store_true')
72
73
    parser.add_argument('--renum-residues-dirty', help='',  action='store_true')
74
75
    parser.add_argument('--undo', help='undo operation of action done --inplace, , rename "backup files" .pdb~ to pdb, ALL files in the folder, not only ~ related to the last action (that you might want to revert, so be careful)',  action='store_true')
76
77
    parser.add_argument('--delete-anisou', help='remove files with ANISOU records, works with --inplace',
78
                        action='store_true')
79
80
    parser.add_argument('--remove0', help='remove atoms of X=0 position',
81
                        action='store_true')
82
83
    parser.add_argument('--fix', help='fix a PDB file, ! external program, pdbfixer used to fix missing atoms',
84
                        action='store_true')
85
86
    parser.add_argument('--to-mol2', help='fix a PDB file, ! external program, pdbfixer used to fix missing atoms',
87
                        action='store_true')
88
89
    parser.add_argument('--split-alt-locations', help='splits atoms, e.g. for alt locs A and B, it splits atoms two MODELS (all localizations A goes into MODEL1 and all localizations B goes into MODEL2',
90
                        action='store_true')
91
92
    parser.add_argument('-c', '--clean', help='get clean structure',
93
                        action='store_true')
94
95
    parser.add_argument('--is-pdb', help='check if a file is in the pdb format',
96
                        action='store_true')
97
98
    parser.add_argument('--is-nmr', help='check if a file is NMR-style multiple model pdb',
99
                        action='store_true')
100
101
    parser.add_argument('--nmr-dir', help='make NMR-style multiple model pdb file from a set of files \n\n' +
102
                        "  rna_pdb_tools.py --nmr-dir . 'cwc15_u5_fragments*.pdb' > ~/Desktop/cwc15-u5.pdb\n\n" +
103
                        "please use '' for pattern file recognition, this is a hack to deal with folders with\n"
104
                        "thousands of models, if you used only *.pdb then the terminal will complain that you\n"
105
                        "selected to many files.")
106
107
    parser.add_argument('--un-nmr', help="""split NMR-style multiple model pdb files into individual models [biopython],
108
109
   rna_pdb_tools.py  --un-nmr  split.pdb
110
   2
111
   /Users/magnus/Desktop/3hl2/split_1.pdb
112
   /Users/magnus/Desktop/3hl2/split_2.pdb
113
114
""",
115
                        action='store_true')
116
117
    parser.add_argument('--orgmode', help='get a structure in org-mode format <sick!>',
118
                        action='store_true')
119
120
    parser.add_argument('--get-chain', help='get chain, one or many, e.g, A, but now also ABC works')
121
122
    parser.add_argument('--fetch', action='store_true', help='fetch file from the PDB db, e.g., 1xjr,\nuse \'rp\' to fetch, fetch a given join, 4w90:C or 4w90_C'  +
123
                        'the RNA-Puzzles standardized_dataset [around 100 MB]')
124
125
    parser.add_argument('--fetch-cif',  action='store_true', help='')
126
127
    parser.add_argument('--fetch-ba', action='store_true',
128
                        help='fetch biological assembly from the PDB db')
129
130
    parser.add_argument('--fetch-chain', action='store_true', help='fetch a structure in extract chain, e.g. 6bk8 H')
131
132
    parser.add_argument('--fetch-fasta', action='store_true', help='fetch a fasta/sequence for given PDB ID, e.g. 6bk8')
133
134
    parser.add_argument('--get-seq', help='get seq', action='store_true')
135
136
    parser.add_argument('--rgyration', help='get seq', action='store_true')
137
138
    parser.add_argument('--color-seq', help='color seq, works with --get-seq', action='store_true')
139
140
    parser.add_argument('--ignore-files', help='files to be ingored, .e.g, \'solution\'')
141
    
142
    parser.add_argument('--compact',
143
                        help=textwrap.dedent("""with --get-seq, get it in compact view'
144
$ rna_pdb_tools.py --get-seq --compact *.pdb
145
# 20_Bujnicki_1
146
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68
147
# 20_Bujnicki_2
148
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68
149
# 20_Bujnicki_3
150
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68
151
# 20_Bujnicki_4
152
153
"""), action='store_true')
154
155
    parser.add_argument('--hide-warnings', help='hide warnings, works with --get-chain, it hides warnings that given changes are not detected in a PDB file', action='store_true')
156
157
    parser.add_argument('--get-ss', help='get secondary structure', action='store_true')
158
159
    parser.add_argument('--rosetta2generic', help='convert ROSETTA-like format to a generic pdb',
160
                        action='store_true')
161
162
    parser.add_argument('--no-hr', help='do not insert the header into files',
163
                        action='store_true')
164
165
    parser.add_argument('--renumber-residues', help='by defult is false',
166
                        action='store_true')
167
168
    parser.add_argument('--dont-rename-chains',
169
                        help=textwrap.dedent("""used only with --get-rnapuzzle-ready.
170
By default:
171
   --get-rnapuzzle-ready rename chains from ABC.. to stop behavior switch on this option
172
"""),
173
                        action='store_true')
174
175
    parser.add_argument('--dont-fix-missing-atoms',
176
                        help="""used only with --get-rnapuzzle-ready""",
177
                        action='store_true')
178
179
    parser.add_argument('--inspect',
180
                        help="inspect missing atoms (technically decorator to --get-rnapuzzle-ready without actually doing anything but giving a report on problems)",
181
                        action='store_true')
182
    
183
    parser.add_argument('--collapsed-view', help='',
184
                        action='store_true')
185
186
    parser.add_argument('--cv', help='alias to collapsed_view',
187
                        action='store_true')
188
189
    parser.add_argument('-v', '--verbose', help='tell me more what you\'re doing, please!',
190
                        action='store_true')
191
192
    parser.add_argument('--mutate', help=textwrap.dedent("""mutate residues,
193
e.g.,
194
      --mutate "A:1a+2a+3a+4a,B:1a"
195
to mutate to adenines the first four nucleotides of the chain A
196
and the first nucleotide of the chain B"""))
197
198
    parser.add_argument('--edit',
199
                        dest="edit",
200
                        default='',
201
                        help="edit 'A:6>B:200', 'A:2-7>B:2-7'")
202
203
    parser.add_argument('--rename-chain',
204
                        help="edit 'A>B' to rename chain A to chain B")
205
206
    parser.add_argument('--swap-chains', help='B>A, rename A to _, then B to A, then _ to B')
207
208
    parser.add_argument('--set-chain', help='set chain for all ATOM lines and TER (quite brutal function)')
209
210
    parser.add_argument('--replace-chain',
211
                        default='',
212
                        help=textwrap.dedent("""a file PDB name with one chain that will be used to
213
replace the chain in the original PDB file,
214
the chain id in this file has to be the same with the chain id of the original chain"""))
215
216
    parser.add_argument('--delete',  # type="string",
217
                        dest="delete",
218
                        default='',
219
                        help="delete the selected fragment, e.g. A:10-16, or for more than one fragment --delete 'A:1-25+30-57'")
220
221
    parser.add_argument('--extract',  # type="string",
222
                        dest="extract",
223
                        default='',
224
                        help="extract the selected fragment, e.g. A:10-16, or for more than one fragment --extract 'A:1-25+30-57', or even 'A:1-25+B:30-57'")
225
226
    parser.add_argument('--extract-chain',
227
                         help="extract chain, e.g. A")
228
229
    parser.add_argument('--uniq', help=textwrap.dedent("""
230
rna_pdb_tools.py --get-seq --uniq '[:5]' --compact --chain-first * | sort
231
A:1-121        ACCUUGCGCAACUGGCGAAUCCUGGGGCUGCCGCCGGCAGUACCC...CA # rp13nc3295_min.out.1
232
A:1-123        ACCUUGCGCGACUGGCGAAUCCUGAAGCUGCUUUGAGCGGCUUCG...AG # rp13cp0016_min.out.1
233
A:1-123        ACCUUGCGCGACUGGCGAAUCCUGAAGCUGCUUUGAGCGGCUUCG...AG # zcp_6537608a_ALL-000001_AA
234
A:1-45 57-71   GGGUCGUGACUGGCGAACAGGUGGGAAACCACCGGGGAGCGACCCGCCGCCCGCCUGGGC # solution
235
"""))
236
237
    parser.add_argument('--chain-first', help="", action='store_true')
238
    parser.add_argument('--oneline', help="", action='store_true')
239
240
    parser.add_argument('--replace-htm', help="", action='store_true')
241
242
    parser.add_argument('--fasta',
243
                        help= textwrap.dedent("""with --get-seq, show sequences in fasta format,
244
can be combined with --compact (mind, chains will be separated with ' ' in one line)
245
246
$ rna_pdb_tools.py --get-seq --fasta --compact input/20_Bujnicki_1.pdb
247
> 20_Bujnicki_1
248
ACCCGCAAGGCCGACGGC GCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU
249
250
"""), action='store_true')
251
252
    parser.add_argument('--cif2pdb', help="convert cif to PDB, fancy way", action='store_true')
253
    parser.add_argument('--pdb2cif', help="[PyMOL Python package required]", action='store_true')
254
255
    parser.add_argument('--mdr', help='get structures ready for MD (like rpr but without first)',
256
                        action='store_true')
257
258
    x = parser.add_argument_group('RNAPUZZLE-READY')
259
    x.add_argument('--get-rnapuzzle-ready',
260
                        help=textwrap.dedent("""get RNApuzzle ready (keep only standard atoms).'
261
Be default it does not renumber residues, use --renumber-residues
262
[requires BioPython]"""), action='store_true')
263
264
    x.add_argument('--rpr', help='alias to get_rnapuzzle ready)',
265
                        action='store_true')
266
267
268
    rpr = parser.add_argument_group('CAN BE COMBINED WITH')# --get-rnapuzzle-ready (or --rpr) can be combined with')
269
270
    rpr.add_argument('--keep-hetatm', help='keep hetatoms', action='store_true')
271
272
    rpr.add_argument('--inplace', help=textwrap.dedent("""in place edit the file! [experimental,
273
only for get_rnapuzzle_ready, --delete, --get-ss, --get-seq, --edit-pdb]"""),
274
                        action='store_true')
275
276
    rpr.add_argument('--here', help=textwrap.dedent("""save a file next to the original file with auto suffix
277
for --extract it's .extr.pdb"""),
278
                        action='store_true')
279
280
    rpr.add_argument('--suffix', help=textwrap.dedent("""when used with --inplace allows you to change a name of a new file, --suffix del will give <file>_del.pdb (mind added _)"""))
281
282
    rpr.add_argument('--replace-hetatm', help="replace 'HETATM' with 'ATOM' [tested only with --get-rnapuzzle-ready]",
283
                        action="store_true")
284
285
    rpr.add_argument('--dont-report-missing-atoms',
286
                        help="""used only with --get-rnapuzzle-ready""",
287
                        action='store_true')
288
289
    rpr.add_argument('--backbone-only',
290
                        help="""used only with --get-rnapuzzle-ready, keep only backbone (= remove bases)""",
291
                        action='store_true')
292
293
    rpr.add_argument('--p-only',
294
                        help="""used only with --get-rnapuzzle-ready, keep p backbone (= remove bases)""",
295
                        action='store_true')
296
297
    rpr.add_argument('--ref-frame-only',
298
                        help="""used only with --get-rnapuzzle-ready, keep only reference frames, OP1 OP2 P""",
299
                        action='store_true')
300
301
    rpr.add_argument('--no-backbone',
302
                        help="""used only with --get-rnapuzzle-ready, remove atoms of backbone (define as P OP1 OP2 O5')""",
303
                        action='store_true')
304
305
    rpr.add_argument('--bases-only',
306
                        help="""used only with --get-rnapuzzle-ready, keep only atoms of bases""",
307
                        action='store_true')
308
309
    rpr.add_argument('--save-single-res',
310
                        help="""used only with --get-rnapuzzle-ready, for each residue create a new pdb output file, you can combine it with --bases-only etc.""",
311
                        action='store_true')
312
313
    parser.add_argument('file', help='file', nargs='+')
314
    #parser.add_argument('outfile', help='outfile')
315
    return parser
316
317
318
# main
319
if __name__ == '__main__':
320
    # get version
321
    version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__)
322
    version = version[1].strip()
323
324
    # get parser and arguments
325
    parser = get_parser()
326
327
    args = parser.parse_args()
328
329
    # quick fix for one files vs file-s
330
    if list == type(args.file) and len(args.file) == 1:
331
        args.file = args.file[0]
332
333
    if args.report:
334
        s = RNAStructure(args.file)
335
        print(s.get_report)
336
        print(s.get_preview())
337
        print(s.get_info_chains())
338
339
    if args.clean:
340
        s = RNAStructure(args.file)
341
        s.decap_gtp()
342
        s.std_resn()
343
        s.remove_hydrogen()
344
        s.remove_ion()
345
        s.remove_water()
346
        s.renum_atoms()
347
        s.fix_O_in_UC()
348
        s.fix_op_atoms()
349
        # print s.get_preview()
350
        # s.write(args.outfile)
351
        if not args.no_hr:
352
            print(add_header(version))
353
        print(s.get_text())
354
355
    if args.get_seq:
356
        # quick fix - make a list on the spot
357
        if list != type(args.file):
358
            args.file = [args.file]
359
        ##################################
360
        analyzed = []
361
        for f in args.file:
362
            #####################################
363
            if args.uniq:
364
                subname = eval('f' + args.uniq)
365
                if subname in analyzed:
366
                    continue
367
                else:
368
                    analyzed.append(subname)
369
            ########
370
            s = RNAStructure(f)
371
            if not s.is_pdb():
372
                print('Error: Not a PDB file %s' % f)
373
                sys.exit(1)
374
            s.decap_gtp()
375
            s.std_resn()
376
            s.remove_hydrogen()
377
            s.remove_ion()
378
            s.remove_water()
379
            if args.renum_atoms:
380
                s.renum_atoms()
381
            s.fix_O_in_UC()
382
            s.fix_op_atoms()
383
384
            output = ''
385
386
            # with # is easier to grep this out
387
            if args.fasta:
388
                # s.fn vs s.name
389
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, fasta=args.fasta, addfn=s.name, color=args.color_seq) + '\n'
390
            elif args.oneline:
391
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq).strip() + ' # '+ os.path.basename(f.replace('.pdb', '')) + '\n'
392
            else:
393
                output += '# ' + os.path.basename(f.replace('.pdb', '')) + '\n'
394
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq) + '\n'
395
396
            try:
397
                sys.stdout.write(output)
398
                sys.stdout.flush()
399
            except IOError:
400
                pass
401
402
    if args.get_ss:
403
        # quick fix - make a list on the spot
404
        if list != type(args.file):
405
            args.file = [args.file]
406
        ##################################
407
        for f in args.file:
408
            output = f + '\n'
409
            p = x3DNA(f)
410
            output += p.get_secstruc() + '\n'
411
            try:
412
                sys.stdout.write(output)
413
                sys.stdout.flush()
414
            except IOError:
415
                pass
416
417
    # getchain
418
    if args.get_chain:
419
        s = RNAStructure(args.file)
420
        ## s.std_resn()
421
        ## s.remove_hydrogen()
422
        ## s.remove_ion()
423
        ## s.remove_water()
424
        ## s.renum_atoms()
425
        ## s.fix_O_in_UC()
426
        ## s.fix_op_atoms()
427
        # print s.get_preview()
428
        warningmsg = ''
429
        for chain in list(args.get_chain):
430
            chain_txt = s.get_chain(chain)
431
            if not chain_txt.strip():
432
                warningmsg += 'Warning: Chain %s not detected!' % chain
433
            else:
434
                print(chain_txt)
435
        if not args.hide_warnings:
436
            if warningmsg:
437
                print(warningmsg)
438
439
    if args.rosetta2generic:
440
        s = RNAStructure(args.file)
441
        s.std_resn()
442
        s.remove_hydrogen()
443
        s.remove_ion()
444
        s.remove_water()
445
        s.fix_op_atoms()
446
        s.renum_atoms()
447
        # print s.get_preview()
448
        # s.write(args.outfile)
449
        if not args.no_hr:
450
            print(add_header(version))
451
        print(s.get_text())
452
453
    remarks_only = False
454
    if args.inspect:
455
        args.get_rnapuzzle_ready = True
456
        fix_missing_atom = False
457
        remarks_only = True
458
        args.dont_rename_chains = True
459
        args.renumber_residues = False
460
461
    if args.get_rnapuzzle_ready or args.rpr or args.mdr:  # rnapuzzle
462
        # quick fix - make a list on the spot
463
        if list != type(args.file):
464
            args.file = [args.file]
465
        ##################################
466
        # progress bar only in --inplace mode!
467
        if args.inplace:
468
            if not args.no_progress_bar:
469
                import progressbar
470
                bar = progressbar.ProgressBar(max_value=len(args.file))
471
                bar.update(0)
472
473
        for c, f in enumerate(args.file):
474
            if args.verbose:
475
                print(f)
476
            if args.inplace:
477
                shutil.copy(f, f + '~')
478
479
            # keep previous edits
480
            previous_edits = []
481
            with open(f) as fx:
482
                for l in fx:
483
                    if l.startswith('HEADER --'):
484
                        previous_edits.append(l.strip())
485
            ######################
486
487
            s = RNAStructure(f)
488
            if args.replace_hetatm:
489
                s.replace_hetatms()
490
491
            s.remove_hydrogen()
492
            s.decap_gtp()
493
            s.std_resn()
494
            s.fix_op_atoms()
495
496
            s.remove_ion()
497
            s.remove_water()
498
            # s.renum_atoms()
499
500
            s.shift_atom_names()
501
            s.prune_elements()
502
503
            # s.write('tmp.pdb')
504
            
505
            rename_chains = False if args.dont_rename_chains else True
506
507
            report_missing_atoms = not args.dont_report_missing_atoms
508
            fix_missing_atom = not args.dont_fix_missing_atoms
509
510
            ignore_op3 = False
511
            if args.mdr:
512
                ignore_op3 = True
513
                
514
            remarks = s.get_rnapuzzle_ready(args.renumber_residues, fix_missing_atoms=fix_missing_atom,
515
                                            rename_chains=rename_chains,
516
                                            report_missing_atoms=report_missing_atoms,
517
                                            backbone_only=args.backbone_only,
518
                                            no_backbone=args.no_backbone,
519
                                            bases_only=args.bases_only,
520
                                            p_only=args.p_only,
521
                                            keep_hetatm=args.keep_hetatm,
522
                                            ignore_op3=ignore_op3,
523
                                            save_single_res=args.save_single_res,
524
                                            ref_frame_only = args.ref_frame_only,
525
                                            verbose=args.verbose)
526
527
            if args.inplace:
528
                if args.suffix:
529
                    f = f.replace('.pdb', '_' + args.suffix + '.pdb')
530
                    if args.verbose: print(f)
531
                        
532
                with open(f, 'w') as f:
533
                    if not args.no_hr:
534
                        f.write(add_header(version) + '\n')
535
                    if previous_edits:
536
                        f.write('\n'.join(previous_edits) + '\n')
537
                    if remarks:
538
                        f.write('\n'.join(remarks) + '\n')
539
                    f.write(s.get_text())
540
                # progress bar only in --inplace mode!
541
                if not args.no_progress_bar:
542
                   bar.update(c)
0 ignored issues
show
introduced by
The variable bar does not seem to be defined for all execution paths.
Loading history...
543
544
            else:
545
                output = ''
546
                if not args.no_hr:
547
                    output += add_header(version) + '\n'
548
                if remarks:
549
                    output += '\n'.join(remarks) + '\n'
550
                output += s.get_text() + '\n'
551
552
                if remarks_only:
553
                    sys.stdout.write('\n'.join(remarks))
554
                    sys.stdout.flush()
555
                else:
556
                    if args.here:
557
                        if '_rpr' not in f:  # good idea?
558
                            nf = f.replace('.pdb', '_rpr.pdb')
559
                            with open(nf, 'w') as fio:
560
                                print(nf)
561
                                fio.write(output)
562
                    else:
563
                        try:
564
                            sys.stdout.write(output)
565
                            sys.stdout.flush()
566
                        except IOError:
567
                            pass
568
        # hmm... fix for problem with renumbering, i do renumbering
569
        # and i stop here
570
        # i'm not sure that this is perfect
571
        sys.exit(0)
572
573
574
    if args.renumber_residues:
575
        s = RNAStructure(args.file)
576
        s.remove_hydrogen()
577
        s.remove_ion()
578
        s.remove_water()
579
        s.get_rnapuzzle_ready(args.renumber_residues)
580
        s.renum_atoms()
581
        if not args.no_hr:
582
            print(add_header(version))
583
        print(s.get_text())
584
585
# args.renumber_resides_dirty
586
    if args.renum_residues_dirty:
587
        # quick fix - make a list on the spot
588
        if list != type(args.file):
589
            args.file = [args.file]
590
        ##################################
591
        for f in args.file:
592
            if args.inplace:
593
                shutil.copy(f, f + '~')
594
595
            s = RNAStructure(f)
596
597
            output = ''
598
            #if not args.no_hr:
599
            #    output += add_header(version) + '\n'
600
            #    output += 'HEADER --delete ' + args.delete + '\n'  # ' '.join(str(selection))
601
            c = 1
602
            old_resi = -1
603
            for l in s.lines:
604
                if l.startswith('ATOM') or l.startswith("HETATOM"):
605
                    resi = int(l[23:26].strip())
606
                    if resi != old_resi:
607
                        old_resi = resi
608
                        c += 1
609
                    # print(resi, c)
610
                    #resi = c
611
                    #if chain in selection:
612
                    #    if resi in selection[chain]:
613
                    #        continue  # print chain, resi
614
                    output += l[:23] + str(c).rjust(3) + l[26:] + '\n'
615
            # write: inplace
616
            if args.inplace:
617
                with open(f, 'w') as f:
618
                    f.write(output)
619
            else:  # write: to stdout
620
                try:
621
                    sys.stdout.write(output)
622
                    sys.stdout.flush()
623
                except IOError:
624
                    pass
625
626
627
    if args.undo:
628
        # quick fix - make a list on the spot
629
        dir = args.file #os.path.abso(os.path.dirname(args.file))
630
        for f in glob.glob(dir + '/*.pdb~'):
631
            if args.verbose:
632
                print(f, '->',f.replace('.pdb~', '.pdb'))
633
            os.rename(f, f.replace('.pdb~', '.pdb'))
634
635
    if args.delete:
636
        # quick fix - make a list on the spot
637
        if list != type(args.file):
638
            args.file = [args.file]
639
        ##################################
640
        for f in args.file:
641
            if args.inplace:
642
                shutil.copy(f, f + '~')
643
644
            selection = select_pdb_fragment(args.delete)
645
            s = RNAStructure(f)
646
647
            output = ''
648
            if not args.no_hr:
649
                output += add_header(version) + '\n'
650
                output += 'HEADER --delete ' + args.delete + '\n'  # ' '.join(str(selection))
651
            for l in s.lines:
652
                if l.startswith('ATOM'):
653
                    chain = l[21]
654
                    resi = int(l[23:26].strip())
655
                    if chain in selection:
656
                        if resi in selection[chain]:
657
                            continue  # print chain, resi
658
                    output += l + '\n'
659
660
            # write: inplace
661
            if args.inplace:
662
                os.rename(f, f.replace('.pdb', '.pdb~'))                
663
                if args.suffix:
664
                    f = f.replace('.pdb', '_' + args.suffix + '.pdb')
665
                with open(f, 'w') as f:
666
                    f.write(output)
667
            else:  # write: to stdout
668
                try:
669
                    sys.stdout.write(output)
670
                    sys.stdout.flush()
671
                except IOError:
672
                    pass
673
674
    if args.replace_chain:
675
        # quick fix - make a list on the spot
676
        if list != type(args.file):
677
            args.file = [args.file]
678
        ##################################
679
        for f in args.file:
680
            if args.inplace:
681
                shutil.copy(f, f + '~')
682
683
            # --replace_chain <file> without A:<file> it will be easier than --x "A:<file>"
684
            s = RNAStructure(args.replace_chain)
685
            chain_ids = (s.get_all_chain_ids())
686
            if len(chain_ids) > 1:
687
                raise Exception('There is more than one chain in the inserted PDB file. There should be only one chain, the one you want to insert to the PDB.')
688
            out = replace_chain(f, args.replace_chain, list(chain_ids)[0])
689
            print(out)
690
691
    if args.mutate:
692
        # quick fix - make a list on the spot
693
        if list != type(args.file):
694
            args.file = [args.file]
695
        ##################################
696
        from rna_tools.tools.mini_moderna3.moderna import *
697
698
        for f in args.file:
699
            if args.ignore_files:
700
                if args.ignore_files in f:
701
                    continue
702
                
703
            if args.inplace:
704
                shutil.copy(f, f + '~')  # create a backup copy if inplace
705
706
            # create a working copy of the main file
707
            ftf = tempfile.NamedTemporaryFile(delete=False).name  # f temp file
708
            shutil.copy(f, ftf)  # create a backup copy if inplace
709
710
            # go over each chain
711
            # rna_pdb_tools.py --mutate 'A:1CB:1G,A:1U+B:1A' CG_AB.pdb > ~/Desktop/a.pdb
712
            args.mutate = args.mutate.upper()
713
            for m in args.mutate.split(','):  # A:1A, B:1A
714
                chain, resi_mutate_to = m.strip().split(':')  # A:1A+2C
715
                resi_mutate_to_list = resi_mutate_to.split('+')  # A:1A+2C
716
717
                model = load_model(f, chain)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable load_model does not seem to be defined.
Loading history...
718
                # go over each mutation in this chain
719
                for resi_mutate_to in resi_mutate_to_list:
720
                    resi = resi_mutate_to[:-1]
721
                    mutate_to = resi_mutate_to[-1]
722
                    if args.verbose:
723
                        print('Mutate', model[resi], 'to', mutate_to)
724
                    exchange_single_base(model[resi], mutate_to)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable exchange_single_base does not seem to be defined.
Loading history...
725
726
                # multi mutation in one chain
727
                tf = tempfile.NamedTemporaryFile(delete=False)
728
                model.write_pdb_file(tf.name)
729
730
                # work on the copy of f, ftf
731
                output = replace_chain(ftf, tf.name, chain)
732
                with open(ftf, 'w') as tmp:
733
                    tmp.write(output)
734
735
            # write: inplace
736
            if args.inplace:
737
                # ftf now is f, get ready for the final output
738
                if args.suffix:
739
                    f = f.replace('.pdb', '_' + args.suffix + '.pdb')
740
                # rpr on the file
741
                shutil.copy(ftf, f)
742
                os.system('rna_pdb_tools.py --rpr --no-progress-bar --inplace ' + f)
743
            else:  # write: to stdout
744
                try:
745
                    sys.stdout.write(output)
0 ignored issues
show
introduced by
The variable output does not seem to be defined for all execution paths.
Loading history...
746
                    sys.stdout.flush()
747
                except IOError:
748
                    pass
749
750
751
# extract
752
    if args.extract:
753
        # quick fix - make a list on the spot
754
        if list != type(args.file):
755
            args.file = [args.file]
756
        ##################################
757
        for f in args.file:
758
            if args.inplace:
759
                shutil.copy(f, f + '~')
760
761
            selection = select_pdb_fragment(args.extract)
762
            s = RNAStructure(f)
763
764
            output = ''
765
            if not args.no_hr:
766
                output += add_header(version) + '\n'
767
                output += 'HEADER extract ' + args.extract + '\n'  # ' '.join(str(selection))
768
            for l in s.lines:
769
                if l.startswith('ATOM'):
770
                    chain = l[21]
771
                    resi = int(l[23:26].strip())
772
                    if chain in selection:
773
                        if resi in selection[chain]:
774
                            # continue  # print chain, resi
775
                            output += l + '\n'
776
777
            # write: inplace
778
            if args.inplace:
779
                with open(f, 'w') as f:
780
                    f.write(output)
781
            elif args.here:
782
                if '_extr' not in f:  # good idea?
783
                    nf = f.replace('.pdb', '_extr.pdb')
784
                    with open(nf, 'w') as fio:
785
                        print(nf)
786
                        fio.write(output)
787
            else:  # write: to stdout
788
                try:
789
                    sys.stdout.write(output)
790
                    sys.stdout.flush()
791
                except IOError:
792
                    pass
793
794
795
    if args.extract_chain:
796
        # quick fix - make a list on the spot
797
        if list != type(args.file):
798
            args.file = [args.file]
799
        ##################################
800
        for f in args.file:
801
            if args.inplace:
802
                shutil.copy(f, f + '~')
803
804
            selection = select_pdb_fragment(args.extract)
805
            s = RNAStructure(f)
806
807
            output = ''
808
            if not args.no_hr:
809
                output += add_header(version) + '\n'
810
                output += 'HEADER extract ' + args.extract + '\n'  # ' '.join(str(selection))
811
            for l in s.lines:
812
                if l.startswith('ATOM') or l.startswith('TER') or l.startswith('HETATM'):
813
                    chain = l[21]
814
                    if chain in args.extract_chain:
815
                        output += l + '\n'
816
817
            # write: inplace
818
            if args.inplace:
819
                with open(f, 'w') as f:
820
                    f.write(output)
821
            else:  # write: to stdout
822
                try:
823
                    sys.stdout.write(output)
824
                    sys.stdout.flush()
825
                except IOError:
826
                    pass
827
828
    if args.is_pdb:
829
        s = RNAStructure(args.file)
830
        output = str(s.is_pdb())
831
        sys.stdout.write(output + '\n')
832
833
    if args.un_nmr:
834
        s = RNAStructure(args.file)
835
        str(s.un_nmr())
836
837
    if args.is_nmr:
838
        struc = RNAStructure(args.file)
839
        output = str(struc.is_nmr(args.verbose))
840
        sys.stdout.write(output + '\n')
841
842
    #edit
843
    if args.edit:
844
        if list != type(args.file):
845
            args.file = [args.file]
846
        ##################################
847
        for f in args.file:
848
            if args.verbose:
849
                print(f)
850
            if args.inplace:
851
                shutil.copy(f, f + '~')
852
853
            output = edit_pdb(f, args)
854
855
            if args.inplace:
856
                with open(f, 'w') as f:
857
                    f.write(output)
858
            else:  # write: to stdout
859
                try:
860
                    sys.stdout.write(output)
861
                    sys.stdout.flush()
862
                except IOError:
863
                    pass
864
865
866
    if args.fetch:
867
        fn = fetch(args.file)
868
            
869
    if args.fetch_fasta:
870
        pdb_id = args.file
871
        pdb_id = pdb_id.replace('.pdb', '')
872
873
        import urllib3
874
        http = urllib3.PoolManager()
875
876
        response = http.request('GET', 'https://www.rcsb.org/fasta/entry/' + pdb_id.upper())
877
        if not response.status == 200:
878
            raise PDBFetchError()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable PDBFetchError does not seem to be defined.
Loading history...
879
        else:
880
            txt = response.data.decode("utf-8").strip()
881
            print(txt)            
882
            if 'No valid PDB IDs were submitted' in txt:
883
                pass
884
            else:
885
                with open(pdb_id + '.fa', 'w') as f:
886
                    f.write(txt)
887
888
        
889
    if args.fetch_chain:
890
        fn = fetch(args.file[0])
891
        chain = args.file[1]
892
        nfn = fn.replace('.pdb','') + '_' + chain + '.pdb' # 6bk8_H.pdb
893
        #cmd = 'rna_pdb_tools.py --get-chain %s %s' > %s' % (chain, fn, nfn)
894
        cmd = 'rna_pdb_tools.py --get-chain %s %s' % (chain, fn)
895
        os.system(cmd)
896
        # print(nfn)
897
898
    if args.fetch_ba:
899
        fetch_ba(args.file)
900
901
    if args.fetch_cif:
902
        if list != type(args.file):
903
            args.file = [args.file]
904
        ##################################
905
        for f in args.file:
906
            fetch_cif(f)
907
908
    if args.collapsed_view or args.cv:
909
        collapsed_view(args)
910
911
    if args.delete_anisou:
912
        # quick fix - make a list on the spot
913
        if list != type(args.file):
914
            args.file = [args.file]
915
        ##################################
916
        for f in args.file:
917
            if args.inplace:
918
                shutil.copy(f, f + '~')
919
920
            s = RNAStructure(f)
921
922
            output = ''
923
            if not args.no_hr:
924
                output += add_header(version) + '\n'
925
926
            for l in s.lines:
927
                if l.startswith('ANISOU'):
928
                    continue
929
                else:
930
                    output += l + '\n'
931
932
            if args.inplace:
933
                with open(f, 'w') as f:
934
                    f.write(output)
935
            else:  # write: to stdout
936
                try:
937
                    sys.stdout.write(output)
938
                    sys.stdout.flush()
939
                except IOError:
940
                    pass
941
942
943
    if args.swap_chains:
944
        # quick fix - make a list on the spot
945
        if list != type(args.file):
946
            args.file = [args.file]
947
        ##################################
948
        for f in args.file:
949
            if args.inplace:
950
                shutil.copy(f, f + '~')
951
            # rename_chain 'A>B'
952
            s = RNAStructure(f)
953
            output = ''
954
            if not args.no_hr:
955
                output += add_header(version) + '\n'
956
957
            chain_id_old, chain_id_new = args.swap_chains.split('>')
958
            if chain_id_old == chain_id_new:
959
                output = open(f).read() # return the file ;-) itself unchanged
960
            else:
961
                s.rename_chain(chain_id_new, '_')
962
                s.rename_chain(chain_id_old, chain_id_new)
963
                output += s.rename_chain('_', chain_id_old)
964
965
            if args.inplace:
966
                with open(f, 'w') as f:
967
                    f.write(output)
968
            else:  # write: to stdout
969
                try:
970
                    sys.stdout.write(output)
971
                    sys.stdout.flush()
972
                except IOError:
973
                    pass
974
975
    if args.rename_chain:
976
        # quick fix - make a list on the spot
977
        if list != type(args.file):
978
            args.file = [args.file]
979
        ##################################
980
        for f in args.file:
981
            if args.inplace:
982
                shutil.copy(f, f + '~')
983
            # rename_chain 'A>B'
984
            s = RNAStructure(f)
985
            chain_id_old, chain_id_new = args.rename_chain.split('>')
986
            output = ''
987
            if not args.no_hr:
988
                output += add_header(version) + '\n'
989
            output += s.rename_chain(chain_id_old, chain_id_new)
990
            if args.inplace:
991
                with open(f, 'w') as f:
992
                    f.write(output)
993
            else:  # write: to stdout
994
                try:
995
                    sys.stdout.write(output)
996
                    sys.stdout.flush()
997
                except IOError:
998
                    pass
999
1000
1001
    if args.split_alt_locations:
1002
        # quick fix - make a list on the spot
1003
        if list != type(args.file):
1004
            args.file = [args.file]
1005
        ##################################
1006
        for f in args.file:
1007
            if args.inplace:
1008
                shutil.copy(f, f + '~')
1009
1010
            #s = RNAStructure(f)
1011
            s = open(f)
1012
            output = ''
1013
            if not args.no_hr:
1014
                output += add_header(version) + '\n'
1015
1016
            # First, collect all alternative locations.
1017
            alts = set()
1018
            for l in s:
1019
                if l.startswith('ATOM'):
1020
                    if l[16].strip():
1021
                        alts.add(l[16])
1022
            s.close()
1023
1024
            if args.verbose:
1025
                print('alts:', alts)
1026
1027
            for index, alt in enumerate(alts):
1028
                output += 'MODEL %i' % (index + 1)
1029
                s = open(f)
1030
                for l in s:
1031
                    if l.startswith('ATOM') or l.startswith('HETATM'):
1032
                        # if empty, then print this line
1033
                        if l[16] == ' ':
1034
                            output += l
1035
                        if l[16] == alt:
1036
                            output += l
1037
                    else:
1038
                        output += l
1039
                output += 'ENDMDL\n'
1040
                s.close()
1041
1042
            if args.inplace:
1043
                with open(f, 'w') as f:
1044
                    f.write(output)
1045
            else:  # write: to stdout
1046
                try:
1047
                    sys.stdout.write(output)
1048
                    sys.stdout.flush()
1049
                except IOError:
1050
                    pass
1051
1052
    if args.orgmode:
1053
        if args.inplace:
1054
            shutil.copy(args.file, args.file + '~')
1055
        s = RNAStructure(args.file)
1056
        s.decap_gtp()
1057
        s.std_resn()
1058
        s.remove_hydrogen()
1059
        s.remove_ion()
1060
        s.remove_water()
1061
        s.fix_op_atoms()
1062
        s.renum_atoms()
1063
        s.shift_atom_names()
1064
        s.prune_elements()
1065
        # print s.get_preview()
1066
        # s.write(args.outfile)
1067
        # for l in s.lines:
1068
        #    print l
1069
1070
        remarks = s.get_rnapuzzle_ready(
1071
            args.renumber_residues, fix_missing_atoms=True, rename_chains=True, verbose=args.verbose)
1072
1073
        with open(args.file + '~', 'w') as f:
1074
            if not args.no_hr:
1075
                f.write(add_header(version) + '\n')
1076
1077
            f.write('\n'.join(remarks) + '\n')
1078
            f.write(s.get_text())
1079
1080
        try:
1081
            from Bio import PDB
1082
            from Bio.PDB import PDBIO
1083
            import warnings
1084
            warnings.filterwarnings('ignore', '.*Invalid or missing.*',)
1085
            warnings.filterwarnings('ignore', '.*with given element *',)
1086
        except:
1087
            sys.exit('Error: Install biopython to use this function (pip biopython)')
1088
1089
        parser = PDB.PDBParser()
1090
        struct = parser.get_structure('', args.file + '~')
1091
        model = struct[0]
1092
1093
        # chains [['A', 'seq', [residues]]]
1094
        chains = []
1095
        for c in model.get_list():
1096
            seq = ''
1097
            chain = []
1098
            for r in c:
1099
                chain.append(str(r.get_resname().strip()) + str(r.id[1]))
1100
                seq += r.get_resname().strip()
1101
            chains.append([c.id, seq, chain])
1102
1103
        t = []
1104
        #[['A', 'CCGCCGCGCCAUGCCUGUGGCGG', ['C1', 'C2', 'G3', 'C4', 'C5', 'G6', 'C7', 'G8', 'C9', 'C10', 'A11', 'U12', 'G13', 'C14', 'C15', 'U16', 'G17', 'U18', 'G19', 'G20', 'C21', 'G22', 'G23']], ['B', 'CCGCCGCGCCAUGCCUGUGGCGG', ['C1', 'C2', 'G3', 'C4', 'C5', 'G6', 'C7', 'G8', 'C9', 'C10', 'A11', 'U12', 'G13', 'C14', 'C15', 'U16', 'G17', 'U18', 'G19', 'G20', 'C21', 'G22', 'G23']]]
1105
        for c in chains:
1106
            t.append('* ' + c[0] + ':' + c[2][0][1:] + '-' + c[2][-1][1:] + ' ' + c[1])
1107
            for r in c[2]:
1108
                t.append('** ' + c[0] + ':' + r)
1109
        print('\n'.join(t))
1110
1111
    if args.fix:
1112
        # quick fix - make a list on the spot
1113
        if list != type(args.file):
1114
            args.file = [args.file]
1115
        ##################################
1116
        for f in args.file:
1117
            cmd = 'pdbfixer ' + f + ' --add-atoms all --add-residues'
1118
            print(cmd)
1119
            os.system(cmd)
1120
            if args.inplace:
1121
                shutil.move("output.pdb", f)
1122
            else:
1123
                shutil.move("output.pdb", f.replace('.pdb', '_fx.pdb'))
1124
                            
1125
    if args.to_mol2:
1126
        # quick fix - make a list on the spot
1127
        if list != type(args.file):
1128
            args.file = [args.file]
1129
        ##################################
1130
        for f in args.file:
1131
            cmd = 'obabel -i pdb ' + f + ' -o mol2 -O ' + f.replace('.pdb', '.mol2')
1132
            print(cmd)
1133
            os.system(cmd)
1134
1135
1136
    if args.nmr_dir:
1137
        files = sort_strings(glob.glob(args.nmr_dir + '/' + args.file))
1138
1139
        c = 1
1140
1141
        for f in files:
1142
            #s = RNAStructure(f)
1143
            print("MODEL        " + str(c))
1144
            # at some point I could use RNAStructure for this
1145
            print(open(f).read())
1146
            #print(s.get_text(add_end=False))
1147
            print('ENDMDL')
1148
            c += 1
1149
        print('END')
1150
1151
    if args.set_chain:
1152
        if list != type(args.file):
1153
            args.file = [args.file]
1154
1155
        for f in args.file:
1156
            txt = set_chain_for_struc(f, args.set_chain)
1157
            if args.inplace:
1158
                shutil.move(f, f.replace('.pdb', '.pdb~'))
1159
                with open(f, 'w') as fi:
1160
                    fi.write(txt)
1161
            else:
1162
                print(txt)
1163
1164
    if args.remove0:
1165
        if list != type(args.file):
1166
            args.file = [args.file]
1167
1168
        for f in args.file:
1169
            s = RNAStructure(f)
1170
            txt = ''
1171
            for l in s.lines:
1172
                if s.get_atom_coords(l) == (0.0, 0.0, 0.0):
1173
                    continue
1174
                else:
1175
                    txt += l + '\n'
1176
1177
            if args.inplace:
1178
                shutil.move(f, f.replace('.pdb', '.pdb~'))
1179
                with open(f, 'w') as fi:
1180
                    fi.write(txt)
1181
            else:
1182
                print(txt)
1183
1184
    if args.replace_htm:
1185
        if list != type(args.file):
1186
            args.file = [args.file]
1187
1188
        for f in args.file:
1189
            with open(f) as fn:
1190
                txt = fn.read()
1191
            txt = txt.replace('HETATM', 'ATOM  ')
1192
            if args.inplace:
1193
                shutil.move(f, f.replace('.pdb', '.pdb~'))
1194
                with open(f, 'w') as fi:
1195
                    fi.write(txt)
1196
            else:
1197
                print(txt)
1198
1199
    if args.rgyration:
1200
        if list != type(args.file):
1201
            args.file = [args.file]
1202
        # quick fix - make a list on the spot
1203
        if list != type(args.file):
1204
            args.file = [args.file]
1205
        ##################################
1206
        analyzed = []
1207
        for f in args.file:
1208
            #####################################
1209
            if args.uniq:
1210
                subname = eval('f' + args.uniq)
1211
                if subname in analyzed:
1212
                    continue
1213
                else:
1214
                    analyzed.append(subname)
1215
1216
            s = RNAStructure(f)
1217
            if not s.is_pdb():
1218
                print('Error: Not a PDB file %s' % f)
1219
                sys.exit(1)
1220
            s.decap_gtp()
1221
            s.std_resn()
1222
            s.remove_hydrogen()
1223
            s.remove_ion()
1224
            s.remove_water()
1225
            if args.renum_atoms:
1226
                s.renum_atoms()
1227
            s.fix_O_in_UC()
1228
            s.fix_op_atoms()
1229
1230
            output = ''
1231
1232
            # with # is easier to grep this out
1233
            if args.fasta:
1234
                # s.fn vs s.name
1235
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, fasta=args.fasta, addfn=s.name, color=args.color_seq) + '\n'
1236
            elif args.oneline:
1237
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq).strip() + ' # '+ os.path.basename(f.replace('.pdb', '')) + '\n'
1238
            else:
1239
                N = len(s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq))
1240
                output += '' + os.path.basename(f.replace('.pdb', '')).ljust(60) + ' seq len: ' + str(N) + ' '
1241
            try:
1242
                sys.stdout.write(output)
1243
                sys.stdout.flush()
1244
            except IOError:
1245
                pass
1246
1247
            import math
1248
            def calc_rg():
1249
                    """modified after https://github.com/sarisabban/Rg"""
1250
                    coord = list()
1251
                    mass = list()
1252
                    for line in s.lines:
1253
                            try:
1254
                                    line = line.split()
1255
                                    x = float(line[6])
1256
                                    y = float(line[7])
1257
                                    z = float(line[8])
1258
                                    coord.append([x, y, z])
1259
                                    atom = line[2][0]
1260
                                    if atom == 'C':
1261
                                            mass.append(12.0107)
1262
                                    elif atom == 'O':
1263
                                            mass.append(15.9994)
1264
                                    elif atom == 'N':
1265
                                            mass.append(14.0067)
1266
                                    elif atom == 'S':
1267
                                            mass.append(32.065)
1268
                                    elif atom == 'P':
1269
                                            mass.append(30.97)
1270
                            except:
1271
                                    pass
1272
1273
                    xm = [(m*i, m*j, m*k) for (i, j, k), m in zip(coord, mass)]
1274
                    tmass = sum(mass)
1275
                    rr = sum(mi*i + mj*j + mk*k for (i, j, k), (mi, mj, mk) in zip(coord, xm))
1276
                    mm = sum((sum(i) / tmass)**2 for i in zip(*xm))
1277
                    rg = math.sqrt(rr / tmass-mm)
1278
                    return(round(rg, 3))
1279
1280
            def radius_of_gyration(N):
1281
                    return round(5.5 * (N ** (1/3)), 2)
1282
1283
            rg = calc_rg()
1284
            rgn = radius_of_gyration(N)
0 ignored issues
show
introduced by
The variable N does not seem to be defined in case the for loop on line 1207 is not entered. Are you sure this can never be the case?
Loading history...
1285
            print(f'radius of gyration: {rg}; expected for this seqlen ({N}): {rgn}')
1286
1287
    if args.renum_nmr:
1288
        if list != type(args.file):
1289
            args.file = [args.file]
1290
        txt = ''
1291
        for f in args.file:
1292
            c = 1
1293
            for l in open(f):
1294
                if l.startswith('MODEL'):
1295
                    txt += "MODEL       " + str(c) + '\n'
1296
                    c += 1
1297
                elif l.strip() == 'END':
1298
                    pass
1299
                else:
1300
                    txt += l
1301
        if args.inplace:
1302
            shutil.move(f, f.replace('.pdb', '.pdb~'))
0 ignored issues
show
introduced by
The variable f does not seem to be defined for all execution paths.
Loading history...
1303
            with open(f) as fi:
1304
                fi.write(txt)
1305
        else:
1306
            print(txt)
1307
            
1308
    from rna_tools.rna_tools_config import PYMOL_PATH
1309
    sys.path.insert(0, PYMOL_PATH)
1310 View Code Duplication
    if args.cif2pdb:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1311
        # quick fix - make a list on the spot
1312
        if list != type(args.file):
1313
            args.file = [args.file]
1314
        ##################################
1315
        for cif_file in args.file:
1316
            from Bio.PDB import MMCIFParser, PDBIO
1317
            parser = MMCIFParser()
1318
            structure = parser.get_structure("structure_id", cif_file)
1319
            pdb_file = cif_file.replace('.cif', '_fCIF.pdb')
1320
1321
1322
            try:
1323
                # Save to PDB format
1324
                io = PDBIO()
1325
                io.set_structure(structure)
1326
                io.save(pdb_file)
1327
1328
                print(f'saved: {pdb_file}')
1329
                # open a file add remarks
1330
                new_file = ''
1331
                with open(pdb_file, 'r') as f:
1332
                    if not args.no_hr:
1333
                        new_file += add_header(version) + '\n'
1334
                    new_file += f.read()
1335
1336
                with open(pdb_file, 'w') as f:
1337
                    f.write(new_file)
1338
1339
            except:
1340
                print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.')
1341
1342
                def has_high_rna_content(chain, threshold=0.8):
1343
                    # RNA nucleotides: A, C, G, U, and X (you can modify as needed)
1344
                    rna_nucleotides = ['A', 'C', 'G', 'U', 'X']
1345
                    total_residues = 0
1346
                    rna_residues = 0
1347
1348
                    # Count the total number of residues and RNA-like residues
1349
                    for residue in chain:
1350
                        total_residues += 1
1351
                        if residue.get_resname().strip() in rna_nucleotides:
1352
                            rna_residues += 1
1353
1354
                    # Calculate the proportion of RNA residues
1355
                    if total_residues == 0:
1356
                        return False  # Avoid division by zero if chain has no residues
1357
1358
                    rna_percentage = rna_residues / total_residues
1359
1360
                    # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default)
1361
                    return rna_percentage >= threshold
1362
1363
                from Bio.PDB.MMCIFParser import MMCIFParser
1364
                from Bio.PDB import MMCIFParser, Structure, Model, Chain
1365
                
1366
                # Initialize the parser
1367
                parser = MMCIFParser()
1368
1369
                # Parse the structure
1370
                structure = parser.get_structure("structure", cif_file)
1371
1372
                # Create a list of single-letter chain identifiers
1373
                import string
1374
                letters = list(string.ascii_uppercase)
1375
1376
                for model in structure:
1377
                    for chain in model:
1378
                        if has_high_rna_content(chain):
1379
                            # New structure
1380
                            new_structure = Structure.Structure("new_structure")
1381
                            new_model = Model.Model(0)  # Create a new model
1382
                            new_structure.add(new_model)  # Add the new model to the new structure
1383
1384
                            chain_id_new = letters.pop(0)
1385
                            chain_id = chain.get_id()
1386
1387
                            atom_count = 0
1388
                            for residue in chain:
1389
                                  for atom in residue:
1390
                                       atom_count += 1
1391
1392
                            remarks = []
1393
                            remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}')
1394
1395
                            pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb')
1396
                            print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}')
1397
1398
                            chain.id = chain_id_new
1399
                            new_model.add(chain)
1400
1401
                            io = PDBIO()
1402
                            io.set_structure(new_structure)
1403
                            
1404
                            io.save(pdb_file)
1405
                            # open a file add remarks
1406
                            new_file = ''
1407
                            with open(pdb_file, 'r') as f:
1408
                                if not args.no_hr:
1409
                                    new_file += add_header(version) + '\n'
1410
                                if remarks:
1411
                                    new_file += '\n'.join(remarks) + '\n'
1412
                                new_file += f.read()
1413
1414
                            with open(pdb_file, 'w') as f:
1415
                                f.write(new_file)
1416
1417
    if args.pdb2cif:
1418
        try:
1419
            from pymol import cmd
1420
        except ImportError:
1421
            print('This functionality needs PyMOL. Install it or if installed, check your setup')
1422
            sys.exit(1)
1423
1424
        # quick fix - make a list on the spot
1425
        if list != type(args.file):
1426
            args.file = [args.file]
1427
        ##################################
1428
        for f in args.file:
1429
            cmd.load(f)
1430
            fo = f.replace('.pdb', '.cif')
1431
            cmd.save(fo, '(all)')
1432
            cmd.delete('all')
1433
            print(fo, 'saved')
1434