Passed
Push — master ( 24af79...dfabdb )
by Marcin
06:15 queued 22s
created

build.rna_tools.rna_pdb_tools.get_pdb_header()   A

Complexity

Conditions 4

Size

Total Lines 13
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 11
nop 1
dl 0
loc 13
rs 9.85
c 0
b 0
f 0
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
"""rna_pdb_tools - a swiss army knife to manipulation of RNA pdb structures
5
6
Usage::
7
8
   $ rna_pdb_tools.py --delete A:46-56 --inplace *.pdb
9
10
    $ rna_pdb_tools.py --get-seq *
11
    # BujnickiLab_RNApuzzle14_n01bound
12
    > A:1-61
13
    # BujnickiLab_RNApuzzle14_n02bound
14
    > A:1-61
15
    CGUUAGCCCAGGAAACUGGGCGGAAGUAAGGCCCAUUGCACUCCGGGCCUGAAGCAACGCG
16
    [...]
17
18
See `rna_pdb_merge_into_one.py` to merge PDB files in the order as you like into one NMR-like (multimodel) file
19
20
Examples::
21
22
    rna_pdb_tools.py --backbone-only   --get-rnapuzzle-ready  --inplace --suffix=bo examples/4GXY_min.pdb
23
24
To extract specific atoms for each residue and write them to separate PDB file (next to the input files, following syntax "<input>_<resid>.pdb")::
25
26
    rna_pdb_tools.py --rpr input/4GXY_min.pdb --save-single-res --ref-frame-only
27
28
Atoms presets::
29
30
  --backbone-only       used only with --get-rnapuzzle-ready, keep only backbone (= remove bases)
31
  --ref-frame-only      used only with --get-rnapuzzle-ready, keep only reference frames, OP1 OP2 P
32
  --no-backbone         used only with --get-rnapuzzle-ready, remove atoms of backbone (define as P OP1 OP2 O5')
33
  --bases-only          used only with --get-rnapuzzle-ready, keep only atoms of bases
34
      
35
.. image:: ../pngs/276411138-236435ff-2944-4bec-ab75-dca0d1e3aacf.jpg
36
37
38
-v is for verbose, --version for version ;)
39
"""
40
import argparse
41
import textwrap
42
import os
43
import shutil
44
import sys
45
import tempfile
46
import glob
47
import os
48
from rna_tools.rna_tools_lib import edit_pdb, add_header, get_version, \
49
                          collapsed_view, fetch, fetch_ba, fetch_cif, replace_chain, RNAStructure, \
50
                          select_pdb_fragment, sort_strings, set_chain_for_struc
51
from rna_tools.tools.rna_x3dna.rna_x3dna import x3DNA
52
53
54
def get_parser():
55
    version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__)
56
    version = version[1].strip()
57
    parser = argparse.ArgumentParser(
58
        description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
59
60
    parser.add_argument('--version', help='', action='version', version=version)
61
62
    parser.add_argument('-r', '--report', help='get report',
63
                        action='store_true')
64
65
    parser.add_argument('--no-progress-bar', help='for --no-progress-bar for --rpr', action='store_true')
66
67
    parser.add_argument('--renum-atoms', help='renumber atoms, tested with --get-seq',
68
                        action='store_true')
69
70
    parser.add_argument('--renum-nmr', help='',
71
                        action='store_true')
72
73
    parser.add_argument('--renum-residues-dirty', help='',  action='store_true')
74
75
    parser.add_argument('--undo', help='undo operation of action done --inplace, , rename "backup files" .pdb~ to pdb, ALL files in the folder, not only ~ related to the last action (that you might want to revert, so be careful)',  action='store_true')
76
77
    parser.add_argument('--delete-anisou', help='remove files with ANISOU records, works with --inplace',
78
                        action='store_true')
79
80
    parser.add_argument('--remove0', help='remove atoms of X=0 position',
81
                        action='store_true')
82
83
    parser.add_argument('--fix', help='fix a PDB file, ! external program, pdbfixer used to fix missing atoms',
84
                        action='store_true')
85
86
    parser.add_argument('--to-mol2', help='fix a PDB file, ! external program, pdbfixer used to fix missing atoms',
87
                        action='store_true')
88
89
    parser.add_argument('--split-alt-locations', help='splits atoms, e.g. for alt locs A and B, it splits atoms two MODELS (all localizations A goes into MODEL1 and all localizations B goes into MODEL2',
90
                        action='store_true')
91
92
    parser.add_argument('-c', '--clean', help='get clean structure',
93
                        action='store_true')
94
95
    parser.add_argument('--is-pdb', help='check if a file is in the pdb format',
96
                        action='store_true')
97
98
    parser.add_argument('--is-nmr', help='check if a file is NMR-style multiple model pdb',
99
                        action='store_true')
100
101
    parser.add_argument('--nmr-dir', help='make NMR-style multiple model pdb file from a set of files \n\n' +
102
                        "  rna_pdb_tools.py --nmr-dir . 'cwc15_u5_fragments*.pdb' > ~/Desktop/cwc15-u5.pdb\n\n" +
103
                        "please use '' for pattern file recognition, this is a hack to deal with folders with\n"
104
                        "thousands of models, if you used only *.pdb then the terminal will complain that you\n"
105
                        "selected to many files.")
106
107
    parser.add_argument('--un-nmr', help="""split NMR-style multiple model pdb files into individual models [biopython],
108
109
   rna_pdb_tools.py  --un-nmr  split.pdb
110
   2
111
   /Users/magnus/Desktop/3hl2/split_1.pdb
112
   /Users/magnus/Desktop/3hl2/split_2.pdb
113
114
""",
115
                        action='store_true')
116
117
    parser.add_argument('--orgmode', help='get a structure in org-mode format <sick!>',
118
                        action='store_true')
119
120
    parser.add_argument('--get-chain', help='get chain, one or many, e.g, A, but now also ABC works')
121
122
    parser.add_argument('--fetch', action='store_true', help='fetch file from the PDB db, e.g., 1xjr,\nuse \'rp\' to fetch, fetch a given join, 4w90:C or 4w90_C'  +
123
                        'the RNA-Puzzles standardized_dataset [around 100 MB]')
124
125
    parser.add_argument('--fetch-cif',  action='store_true', help='')
126
    
127
    parser.add_argument('--fetch-header', action='store_true', help='')
128
129
    parser.add_argument('--fetch-ba', action='store_true',
130
                        help='fetch biological assembly from the PDB db')
131
132
    parser.add_argument('--fetch-chain', action='store_true', help='fetch a structure in extract chain, e.g. 6bk8 H')
133
134
    parser.add_argument('--fetch-fasta', action='store_true', help='fetch a fasta/sequence for given PDB ID, e.g. 6bk8')
135
136
    parser.add_argument('--get-seq', help='get seq', action='store_true')
137
138
    parser.add_argument('--rgyration', help='get seq', action='store_true')
139
140
    parser.add_argument('--color-seq', help='color seq, works with --get-seq', action='store_true')
141
142
    parser.add_argument('--ignore-files', help='files to be ingored, .e.g, \'solution\'')
143
    
144
    parser.add_argument('--compact',
145
                        help=textwrap.dedent("""with --get-seq, get it in compact view'
146
$ rna_pdb_tools.py --get-seq --compact *.pdb
147
# 20_Bujnicki_1
148
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68
149
# 20_Bujnicki_2
150
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68
151
# 20_Bujnicki_3
152
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68
153
# 20_Bujnicki_4
154
155
"""), action='store_true')
156
157
    parser.add_argument('--hide-warnings', help='hide warnings, works with --get-chain, it hides warnings that given changes are not detected in a PDB file', action='store_true')
158
159
    parser.add_argument('--get-ss', help='get secondary structure', action='store_true')
160
161
    parser.add_argument('--rosetta2generic', help='convert ROSETTA-like format to a generic pdb',
162
                        action='store_true')
163
164
    parser.add_argument('--no-hr', help='do not insert the header into files',
165
                        action='store_true')
166
167
    parser.add_argument('--renumber-residues', help='by defult is false',
168
                        action='store_true')
169
170
    parser.add_argument('--dont-rename-chains',
171
                        help=textwrap.dedent("""used only with --get-rnapuzzle-ready.
172
By default:
173
   --get-rnapuzzle-ready rename chains from ABC.. to stop behavior switch on this option
174
"""),
175
                        action='store_true')
176
177
    parser.add_argument('--dont-fix-missing-atoms',
178
                        help="""used only with --get-rnapuzzle-ready""",
179
                        action='store_true')
180
181
    parser.add_argument('--inspect',
182
                        help="inspect missing atoms (technically decorator to --get-rnapuzzle-ready without actually doing anything but giving a report on problems)",
183
                        action='store_true')
184
    
185
    parser.add_argument('--collapsed-view', help='',
186
                        action='store_true')
187
188
    parser.add_argument('--cv', help='alias to collapsed_view',
189
                        action='store_true')
190
191
    parser.add_argument('-v', '--verbose', help='tell me more what you\'re doing, please!',
192
                        action='store_true')
193
194
    parser.add_argument('--mutate', help=textwrap.dedent("""mutate residues,
195
e.g.,
196
      --mutate "A:1a+2a+3a+4a,B:1a"
197
to mutate to adenines the first four nucleotides of the chain A
198
and the first nucleotide of the chain B"""))
199
200
    parser.add_argument('--edit',
201
                        dest="edit",
202
                        default='',
203
                        help="edit 'A:6>B:200', 'A:2-7>B:2-7'")
204
205
    parser.add_argument('--rename-chain',
206
                        help="edit 'A>B' to rename chain A to chain B")
207
208
    parser.add_argument('--swap-chains', help='B>A, rename A to _, then B to A, then _ to B')
209
210
    parser.add_argument('--set-chain', help='set chain for all ATOM lines and TER (quite brutal function)')
211
212
    parser.add_argument('--replace-chain',
213
                        default='',
214
                        help=textwrap.dedent("""a file PDB name with one chain that will be used to
215
replace the chain in the original PDB file,
216
the chain id in this file has to be the same with the chain id of the original chain"""))
217
218
    parser.add_argument('--delete',  # type="string",
219
                        dest="delete",
220
                        default='',
221
                        help="delete the selected fragment, e.g. A:10-16, or for more than one fragment --delete 'A:1-25+30-57'")
222
223
    parser.add_argument('--extract',  # type="string",
224
                        dest="extract",
225
                        default='',
226
                        help="extract the selected fragment, e.g. A:10-16, or for more than one fragment --extract 'A:1-25+30-57', or even 'A:1-25+B:30-57'")
227
228
    parser.add_argument('--extract-chain',
229
                         help="extract chain, e.g. A")
230
231
    parser.add_argument('--uniq', help=textwrap.dedent("""
232
rna_pdb_tools.py --get-seq --uniq '[:5]' --compact --chain-first * | sort
233
A:1-121        ACCUUGCGCAACUGGCGAAUCCUGGGGCUGCCGCCGGCAGUACCC...CA # rp13nc3295_min.out.1
234
A:1-123        ACCUUGCGCGACUGGCGAAUCCUGAAGCUGCUUUGAGCGGCUUCG...AG # rp13cp0016_min.out.1
235
A:1-123        ACCUUGCGCGACUGGCGAAUCCUGAAGCUGCUUUGAGCGGCUUCG...AG # zcp_6537608a_ALL-000001_AA
236
A:1-45 57-71   GGGUCGUGACUGGCGAACAGGUGGGAAACCACCGGGGAGCGACCCGCCGCCCGCCUGGGC # solution
237
"""))
238
239
    parser.add_argument('--chain-first', help="", action='store_true')
240
    parser.add_argument('--oneline', help="", action='store_true')
241
242
    parser.add_argument('--replace-htm', help="", action='store_true')
243
244
    parser.add_argument('--fasta',
245
                        help= textwrap.dedent("""with --get-seq, show sequences in fasta format,
246
can be combined with --compact (mind, chains will be separated with ' ' in one line)
247
248
$ rna_pdb_tools.py --get-seq --fasta --compact input/20_Bujnicki_1.pdb
249
> 20_Bujnicki_1
250
ACCCGCAAGGCCGACGGC GCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU
251
252
"""), action='store_true')
253
254
    parser.add_argument('--cif2pdb', help="convert cif to PDB, fancy way", action='store_true')
255
    parser.add_argument('--pdb2cif', help="[PyMOL Python package required]", action='store_true')
256
257
    parser.add_argument('--mdr', help='get structures ready for MD (like rpr but without first)',
258
                        action='store_true')
259
260
    x = parser.add_argument_group('RNAPUZZLE-READY')
261
    x.add_argument('--get-rnapuzzle-ready',
262
                        help=textwrap.dedent("""get RNApuzzle ready (keep only standard atoms).'
263
Be default it does not renumber residues, use --renumber-residues
264
[requires BioPython]"""), action='store_true')
265
266
    x.add_argument('--rpr', help='alias to get_rnapuzzle ready)',
267
                        action='store_true')
268
269
270
    rpr = parser.add_argument_group('CAN BE COMBINED WITH')# --get-rnapuzzle-ready (or --rpr) can be combined with')
271
272
    rpr.add_argument('--keep-hetatm', help='keep hetatoms', action='store_true')
273
274
    rpr.add_argument('--inplace', help=textwrap.dedent("""in place edit the file! [experimental,
275
only for get_rnapuzzle_ready, --delete, --get-ss, --get-seq, --edit-pdb]"""),
276
                        action='store_true')
277
278
    rpr.add_argument('--here', help=textwrap.dedent("""save a file next to the original file with auto suffix
279
for --extract it's .extr.pdb"""),
280
                        action='store_true')
281
282
    rpr.add_argument('--suffix', help=textwrap.dedent("""when used with --inplace allows you to change a name of a new file, --suffix del will give <file>_del.pdb (mind added _)"""))
283
284
    rpr.add_argument('--replace-hetatm', help="replace 'HETATM' with 'ATOM' [tested only with --get-rnapuzzle-ready]",
285
                        action="store_true")
286
287
    rpr.add_argument('--dont-report-missing-atoms',
288
                        help="""used only with --get-rnapuzzle-ready""",
289
                        action='store_true')
290
291
    rpr.add_argument('--backbone-only',
292
                        help="""used only with --get-rnapuzzle-ready, keep only backbone (= remove bases)""",
293
                        action='store_true')
294
295
    rpr.add_argument('--p-only',
296
                        help="""used only with --get-rnapuzzle-ready, keep p backbone (= remove bases)""",
297
                        action='store_true')
298
299
    rpr.add_argument('--ref-frame-only',
300
                        help="""used only with --get-rnapuzzle-ready, keep only reference frames, OP1 OP2 P""",
301
                        action='store_true')
302
303
    rpr.add_argument('--no-backbone',
304
                        help="""used only with --get-rnapuzzle-ready, remove atoms of backbone (define as P OP1 OP2 O5')""",
305
                        action='store_true')
306
307
    rpr.add_argument('--bases-only',
308
                        help="""used only with --get-rnapuzzle-ready, keep only atoms of bases""",
309
                        action='store_true')
310
311
    rpr.add_argument('--save-single-res',
312
                        help="""used only with --get-rnapuzzle-ready, for each residue create a new pdb output file, you can combine it with --bases-only etc.""",
313
                        action='store_true')
314
315
    parser.add_argument('file', help='file', nargs='+')
316
    #parser.add_argument('outfile', help='outfile')
317
    return parser
318
319
320
# main
321
if __name__ == '__main__':
322
    # get version
323
    version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__)
324
    version = version[1].strip()
325
326
    # get parser and arguments
327
    parser = get_parser()
328
329
    args = parser.parse_args()
330
331
    # quick fix for one files vs file-s
332
    if list == type(args.file) and len(args.file) == 1:
333
        args.file = args.file[0]
334
335
    if args.report:
336
        s = RNAStructure(args.file)
337
        print(s.get_report)
338
        print(s.get_preview())
339
        print(s.get_info_chains())
340
341
    if args.clean:
342
        s = RNAStructure(args.file)
343
        s.decap_gtp()
344
        s.std_resn()
345
        s.remove_hydrogen()
346
        s.remove_ion()
347
        s.remove_water()
348
        s.renum_atoms()
349
        s.fix_O_in_UC()
350
        s.fix_op_atoms()
351
        # print s.get_preview()
352
        # s.write(args.outfile)
353
        if not args.no_hr:
354
            print(add_header(version))
355
        print(s.get_text())
356
357
    if args.get_seq:
358
        # quick fix - make a list on the spot
359
        if list != type(args.file):
360
            args.file = [args.file]
361
        ##################################
362
        analyzed = []
363
        for f in args.file:
364
            #####################################
365
            if args.uniq:
366
                subname = eval('f' + args.uniq)
367
                if subname in analyzed:
368
                    continue
369
                else:
370
                    analyzed.append(subname)
371
            ########
372
            s = RNAStructure(f)
373
            if not s.is_pdb():
374
                print('Error: Not a PDB file %s' % f)
375
                sys.exit(1)
376
            s.decap_gtp()
377
            s.std_resn()
378
            s.remove_hydrogen()
379
            s.remove_ion()
380
            s.remove_water()
381
            if args.renum_atoms:
382
                s.renum_atoms()
383
            s.fix_O_in_UC()
384
            s.fix_op_atoms()
385
386
            output = ''
387
388
            # with # is easier to grep this out
389
            if args.fasta:
390
                # s.fn vs s.name
391
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, fasta=args.fasta, addfn=s.name, color=args.color_seq) + '\n'
392
            elif args.oneline:
393
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq).strip() + ' # '+ os.path.basename(f.replace('.pdb', '')) + '\n'
394
            else:
395
                output += '# ' + os.path.basename(f.replace('.pdb', '')) + '\n'
396
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq) + '\n'
397
398
            try:
399
                sys.stdout.write(output)
400
                sys.stdout.flush()
401
            except IOError:
402
                pass
403
404
    if args.get_ss:
405
        # quick fix - make a list on the spot
406
        if list != type(args.file):
407
            args.file = [args.file]
408
        ##################################
409
        for f in args.file:
410
            output = f + '\n'
411
            p = x3DNA(f)
412
            output += p.get_secstruc() + '\n'
413
            try:
414
                sys.stdout.write(output)
415
                sys.stdout.flush()
416
            except IOError:
417
                pass
418
419
    # getchain
420
    if args.get_chain:
421
        s = RNAStructure(args.file)
422
        ## s.std_resn()
423
        ## s.remove_hydrogen()
424
        ## s.remove_ion()
425
        ## s.remove_water()
426
        ## s.renum_atoms()
427
        ## s.fix_O_in_UC()
428
        ## s.fix_op_atoms()
429
        # print s.get_preview()
430
        warningmsg = ''
431
        for chain in list(args.get_chain):
432
            chain_txt = s.get_chain(chain)
433
            if not chain_txt.strip():
434
                warningmsg += 'Warning: Chain %s not detected!' % chain
435
            else:
436
                print(chain_txt)
437
        if not args.hide_warnings:
438
            if warningmsg:
439
                print(warningmsg)
440
441
    if args.rosetta2generic:
442
        s = RNAStructure(args.file)
443
        s.std_resn()
444
        s.remove_hydrogen()
445
        s.remove_ion()
446
        s.remove_water()
447
        s.fix_op_atoms()
448
        s.renum_atoms()
449
        # print s.get_preview()
450
        # s.write(args.outfile)
451
        if not args.no_hr:
452
            print(add_header(version))
453
        print(s.get_text())
454
455
    remarks_only = False
456
    if args.inspect:
457
        args.get_rnapuzzle_ready = True
458
        fix_missing_atom = False
459
        remarks_only = True
460
        args.dont_rename_chains = True
461
        args.renumber_residues = False
462
463
    if args.get_rnapuzzle_ready or args.rpr or args.mdr:  # rnapuzzle
464
        # quick fix - make a list on the spot
465
        if list != type(args.file):
466
            args.file = [args.file]
467
        ##################################
468
        # progress bar only in --inplace mode!
469
        if args.inplace:
470
            if not args.no_progress_bar:
471
                import progressbar
472
                bar = progressbar.ProgressBar(max_value=len(args.file))
473
                bar.update(0)
474
475
        for c, f in enumerate(args.file):
476
            if args.verbose:
477
                print(f)
478
            if args.inplace:
479
                shutil.copy(f, f + '~')
480
481
            # keep previous edits
482
            previous_edits = []
483
            with open(f) as fx:
484
                for l in fx:
485
                    if l.startswith('HEADER --'):
486
                        previous_edits.append(l.strip())
487
            ######################
488
489
            s = RNAStructure(f)
490
            if args.replace_hetatm:
491
                s.replace_hetatms()
492
493
            s.remove_hydrogen()
494
            s.decap_gtp()
495
            s.std_resn()
496
            s.fix_op_atoms()
497
498
            s.remove_ion()
499
            s.remove_water()
500
            # s.renum_atoms()
501
502
            s.shift_atom_names()
503
            s.prune_elements()
504
505
            # s.write('tmp.pdb')
506
            
507
            rename_chains = False if args.dont_rename_chains else True
508
509
            report_missing_atoms = not args.dont_report_missing_atoms
510
            fix_missing_atom = not args.dont_fix_missing_atoms
511
512
            ignore_op3 = False
513
            if args.mdr:
514
                ignore_op3 = True
515
                
516
            remarks = s.get_rnapuzzle_ready(args.renumber_residues, fix_missing_atoms=fix_missing_atom,
517
                                            rename_chains=rename_chains,
518
                                            report_missing_atoms=report_missing_atoms,
519
                                            backbone_only=args.backbone_only,
520
                                            no_backbone=args.no_backbone,
521
                                            bases_only=args.bases_only,
522
                                            p_only=args.p_only,
523
                                            keep_hetatm=args.keep_hetatm,
524
                                            ignore_op3=ignore_op3,
525
                                            save_single_res=args.save_single_res,
526
                                            ref_frame_only = args.ref_frame_only,
527
                                            verbose=args.verbose)
528
529
            if args.inplace:
530
                if args.suffix:
531
                    f = f.replace('.pdb', '_' + args.suffix + '.pdb')
532
                    if args.verbose: print(f)
533
                        
534
                with open(f, 'w') as f:
535
                    if not args.no_hr:
536
                        f.write(add_header(version) + '\n')
537
                    if previous_edits:
538
                        f.write('\n'.join(previous_edits) + '\n')
539
                    if remarks:
540
                        f.write('\n'.join(remarks) + '\n')
541
                    f.write(s.get_text())
542
                # progress bar only in --inplace mode!
543
                if not args.no_progress_bar:
544
                   bar.update(c)
0 ignored issues
show
introduced by
The variable bar does not seem to be defined for all execution paths.
Loading history...
545
546
            else:
547
                output = ''
548
                if not args.no_hr:
549
                    output += add_header(version) + '\n'
550
                if remarks:
551
                    output += '\n'.join(remarks) + '\n'
552
                output += s.get_text() + '\n'
553
554
                if remarks_only:
555
                    sys.stdout.write('\n'.join(remarks))
556
                    sys.stdout.flush()
557
                else:
558
                    if args.here:
559
                        if '_rpr' not in f:  # good idea?
560
                            nf = f.replace('.pdb', '_rpr.pdb')
561
                            with open(nf, 'w') as fio:
562
                                print(nf)
563
                                fio.write(output)
564
                    else:
565
                        try:
566
                            sys.stdout.write(output)
567
                            sys.stdout.flush()
568
                        except IOError:
569
                            pass
570
        # hmm... fix for problem with renumbering, i do renumbering
571
        # and i stop here
572
        # i'm not sure that this is perfect
573
        sys.exit(0)
574
575
576
    if args.renumber_residues:
577
        s = RNAStructure(args.file)
578
        s.remove_hydrogen()
579
        s.remove_ion()
580
        s.remove_water()
581
        s.get_rnapuzzle_ready(args.renumber_residues)
582
        s.renum_atoms()
583
        if not args.no_hr:
584
            print(add_header(version))
585
        print(s.get_text())
586
587
# args.renumber_resides_dirty
588
    if args.renum_residues_dirty:
589
        # quick fix - make a list on the spot
590
        if list != type(args.file):
591
            args.file = [args.file]
592
        ##################################
593
        for f in args.file:
594
            if args.inplace:
595
                shutil.copy(f, f + '~')
596
597
            s = RNAStructure(f)
598
599
            output = ''
600
            #if not args.no_hr:
601
            #    output += add_header(version) + '\n'
602
            #    output += 'HEADER --delete ' + args.delete + '\n'  # ' '.join(str(selection))
603
            c = 1
604
            old_resi = -1
605
            for l in s.lines:
606
                if l.startswith('ATOM') or l.startswith("HETATOM"):
607
                    resi = int(l[23:26].strip())
608
                    if resi != old_resi:
609
                        old_resi = resi
610
                        c += 1
611
                    # print(resi, c)
612
                    #resi = c
613
                    #if chain in selection:
614
                    #    if resi in selection[chain]:
615
                    #        continue  # print chain, resi
616
                    output += l[:23] + str(c).rjust(3) + l[26:] + '\n'
617
            # write: inplace
618
            if args.inplace:
619
                with open(f, 'w') as f:
620
                    f.write(output)
621
            else:  # write: to stdout
622
                try:
623
                    sys.stdout.write(output)
624
                    sys.stdout.flush()
625
                except IOError:
626
                    pass
627
628
629
    if args.undo:
630
        # quick fix - make a list on the spot
631
        dir = args.file #os.path.abso(os.path.dirname(args.file))
632
        for f in glob.glob(dir + '/*.pdb~'):
633
            if args.verbose:
634
                print(f, '->',f.replace('.pdb~', '.pdb'))
635
            os.rename(f, f.replace('.pdb~', '.pdb'))
636
637
    if args.delete:
638
        # quick fix - make a list on the spot
639
        if list != type(args.file):
640
            args.file = [args.file]
641
        ##################################
642
        for f in args.file:
643
            if args.inplace:
644
                shutil.copy(f, f + '~')
645
646
            selection = select_pdb_fragment(args.delete)
647
            s = RNAStructure(f)
648
649
            output = ''
650
            if not args.no_hr:
651
                output += add_header(version) + '\n'
652
                output += 'HEADER --delete ' + args.delete + '\n'  # ' '.join(str(selection))
653
            for l in s.lines:
654
                if l.startswith('ATOM'):
655
                    chain = l[21]
656
                    resi = int(l[23:26].strip())
657
                    if chain in selection:
658
                        if resi in selection[chain]:
659
                            continue  # print chain, resi
660
                    output += l + '\n'
661
662
            # write: inplace
663
            if args.inplace:
664
                os.rename(f, f.replace('.pdb', '.pdb~'))                
665
                if args.suffix:
666
                    f = f.replace('.pdb', '_' + args.suffix + '.pdb')
667
                with open(f, 'w') as f:
668
                    f.write(output)
669
            else:  # write: to stdout
670
                try:
671
                    sys.stdout.write(output)
672
                    sys.stdout.flush()
673
                except IOError:
674
                    pass
675
676
    if args.replace_chain:
677
        # quick fix - make a list on the spot
678
        if list != type(args.file):
679
            args.file = [args.file]
680
        ##################################
681
        for f in args.file:
682
            if args.inplace:
683
                shutil.copy(f, f + '~')
684
685
            # --replace_chain <file> without A:<file> it will be easier than --x "A:<file>"
686
            s = RNAStructure(args.replace_chain)
687
            chain_ids = (s.get_all_chain_ids())
688
            if len(chain_ids) > 1:
689
                raise Exception('There is more than one chain in the inserted PDB file. There should be only one chain, the one you want to insert to the PDB.')
690
            out = replace_chain(f, args.replace_chain, list(chain_ids)[0])
691
            print(out)
692
693
    if args.mutate:
694
        # quick fix - make a list on the spot
695
        if list != type(args.file):
696
            args.file = [args.file]
697
        ##################################
698
        from rna_tools.tools.mini_moderna3.moderna import *
699
700
        for f in args.file:
701
            if args.ignore_files:
702
                if args.ignore_files in f:
703
                    continue
704
                
705
            if args.inplace:
706
                shutil.copy(f, f + '~')  # create a backup copy if inplace
707
708
            # create a working copy of the main file
709
            ftf = tempfile.NamedTemporaryFile(delete=False).name  # f temp file
710
            shutil.copy(f, ftf)  # create a backup copy if inplace
711
712
            # go over each chain
713
            # rna_pdb_tools.py --mutate 'A:1CB:1G,A:1U+B:1A' CG_AB.pdb > ~/Desktop/a.pdb
714
            args.mutate = args.mutate.upper()
715
            for m in args.mutate.split(','):  # A:1A, B:1A
716
                chain, resi_mutate_to = m.strip().split(':')  # A:1A+2C
717
                resi_mutate_to_list = resi_mutate_to.split('+')  # A:1A+2C
718
719
                model = load_model(f, chain)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable load_model does not seem to be defined.
Loading history...
720
                # go over each mutation in this chain
721
                for resi_mutate_to in resi_mutate_to_list:
722
                    resi = resi_mutate_to[:-1]
723
                    mutate_to = resi_mutate_to[-1]
724
                    if args.verbose:
725
                        print('Mutate', model[resi], 'to', mutate_to)
726
                    exchange_single_base(model[resi], mutate_to)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable exchange_single_base does not seem to be defined.
Loading history...
727
728
                # multi mutation in one chain
729
                tf = tempfile.NamedTemporaryFile(delete=False)
730
                model.write_pdb_file(tf.name)
731
732
                # work on the copy of f, ftf
733
                output = replace_chain(ftf, tf.name, chain)
734
                with open(ftf, 'w') as tmp:
735
                    tmp.write(output)
736
737
            # write: inplace
738
            if args.inplace:
739
                # ftf now is f, get ready for the final output
740
                if args.suffix:
741
                    f = f.replace('.pdb', '_' + args.suffix + '.pdb')
742
                # rpr on the file
743
                shutil.copy(ftf, f)
744
                os.system('rna_pdb_tools.py --rpr --no-progress-bar --inplace ' + f)
745
            else:  # write: to stdout
746
                try:
747
                    sys.stdout.write(output)
0 ignored issues
show
introduced by
The variable output does not seem to be defined for all execution paths.
Loading history...
748
                    sys.stdout.flush()
749
                except IOError:
750
                    pass
751
752
753
# extract
754
    if args.extract:
755
        # quick fix - make a list on the spot
756
        if list != type(args.file):
757
            args.file = [args.file]
758
        ##################################
759
        for f in args.file:
760
            if args.inplace:
761
                shutil.copy(f, f + '~')
762
763
            selection = select_pdb_fragment(args.extract)
764
            s = RNAStructure(f)
765
766
            output = ''
767
            if not args.no_hr:
768
                output += add_header(version) + '\n'
769
                output += 'HEADER extract ' + args.extract + '\n'  # ' '.join(str(selection))
770
            for l in s.lines:
771
                if l.startswith('ATOM'):
772
                    chain = l[21]
773
                    resi = int(l[23:26].strip())
774
                    if chain in selection:
775
                        if resi in selection[chain]:
776
                            # continue  # print chain, resi
777
                            output += l + '\n'
778
779
            # write: inplace
780
            if args.inplace:
781
                with open(f, 'w') as f:
782
                    f.write(output)
783
            elif args.here:
784
                if '_extr' not in f:  # good idea?
785
                    nf = f.replace('.pdb', '_extr.pdb')
786
                    with open(nf, 'w') as fio:
787
                        print(nf)
788
                        fio.write(output)
789
            else:  # write: to stdout
790
                try:
791
                    sys.stdout.write(output)
792
                    sys.stdout.flush()
793
                except IOError:
794
                    pass
795
796
797
    if args.extract_chain:
798
        # quick fix - make a list on the spot
799
        if list != type(args.file):
800
            args.file = [args.file]
801
        ##################################
802
        for f in args.file:
803
            if args.inplace:
804
                shutil.copy(f, f + '~')
805
806
            selection = select_pdb_fragment(args.extract)
807
            s = RNAStructure(f)
808
809
            output = ''
810
            if not args.no_hr:
811
                output += add_header(version) + '\n'
812
                output += 'HEADER extract ' + args.extract + '\n'  # ' '.join(str(selection))
813
            for l in s.lines:
814
                if l.startswith('ATOM') or l.startswith('TER') or l.startswith('HETATM'):
815
                    chain = l[21]
816
                    if chain in args.extract_chain:
817
                        output += l + '\n'
818
819
            # write: inplace
820
            if args.inplace:
821
                with open(f, 'w') as f:
822
                    f.write(output)
823
            else:  # write: to stdout
824
                try:
825
                    sys.stdout.write(output)
826
                    sys.stdout.flush()
827
                except IOError:
828
                    pass
829
830
    if args.is_pdb:
831
        s = RNAStructure(args.file)
832
        output = str(s.is_pdb())
833
        sys.stdout.write(output + '\n')
834
835
    if args.un_nmr:
836
        s = RNAStructure(args.file)
837
        str(s.un_nmr())
838
839
    if args.is_nmr:
840
        struc = RNAStructure(args.file)
841
        output = str(struc.is_nmr(args.verbose))
842
        sys.stdout.write(output + '\n')
843
844
    #edit
845
    if args.edit:
846
        if list != type(args.file):
847
            args.file = [args.file]
848
        ##################################
849
        for f in args.file:
850
            if args.verbose:
851
                print(f)
852
            if args.inplace:
853
                shutil.copy(f, f + '~')
854
855
            output = edit_pdb(f, args)
856
857
            if args.inplace:
858
                with open(f, 'w') as f:
859
                    f.write(output)
860
            else:  # write: to stdout
861
                try:
862
                    sys.stdout.write(output)
863
                    sys.stdout.flush()
864
                except IOError:
865
                    pass
866
867
868
    if args.fetch:
869
        if list != type(args.file):
870
            args.file = [args.file]
871
        ##################################
872
        for f in args.file:
873
            fn = fetch(f)
874
            
875
    if args.fetch_fasta:
876
877
878
        pdb_id = args.file
879
        pdb_id = pdb_id.replace('.pdb', '')
880
881
        import urllib3
882
        http = urllib3.PoolManager()
883
884
        response = http.request('GET', 'https://www.rcsb.org/fasta/entry/' + pdb_id.upper())
885
        if not response.status == 200:
886
            raise PDBFetchError()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable PDBFetchError does not seem to be defined.
Loading history...
887
        else:
888
            txt = response.data.decode("utf-8").strip()
889
            print(txt)            
890
            if 'No valid PDB IDs were submitted' in txt:
891
                pass
892
            else:
893
                with open(pdb_id + '.fa', 'w') as f:
894
                    f.write(txt)
895
896
        
897
    if args.fetch_chain:
898
        fn = fetch(args.file[0])
899
        chain = args.file[1]
900
        nfn = fn.replace('.pdb','') + '_' + chain + '.pdb' # 6bk8_H.pdb
901
        #cmd = 'rna_pdb_tools.py --get-chain %s %s' > %s' % (chain, fn, nfn)
902
        cmd = 'rna_pdb_tools.py --get-chain %s %s' % (chain, fn)
903
        os.system(cmd)
904
        # print(nfn)
905
906
    if args.fetch_ba:
907
        fetch_ba(args.file)
908
909
    if args.fetch_header:
910
911
        import requests
912
        if list != type(args.file):
913
            args.file = [args.file]
914
        ##################################
915
916
917
        def get_pdb_header(pdb_id):
918
            url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
919
            response = requests.get(url)
920
            response.raise_for_status()  # Check if the request was successful
921
922
            header_lines = []
923
            for line in response.text.splitlines():
924
                if line.startswith("TITLE"): #or line.startswith("COMPND") or line.startswith("SOURCE"):
925
                    header_lines.append(line)
926
                elif line.startswith("ATOM"):  # Stop once ATOM records begin
927
                    break
928
            
929
            return " ".join([l.replace('TITLE', '').strip() for l in header_lines])
930
931
        for f in args.file:
932
                f = f.replace('.pdb', '').replace('.cif', '')
933
                pdb_id = f.split('_')[0] # _A.cif
934
                header = get_pdb_header(pdb_id)
935
                print(f'{pdb_id} {header}')
936
937
    if args.fetch_cif:
938
        if list != type(args.file):
939
            args.file = [args.file]
940
        ##################################
941
        for f in args.file:
942
            fetch_cif(f)
943
944
    if args.collapsed_view or args.cv:
945
        collapsed_view(args)
946
947
    if args.delete_anisou:
948
        # quick fix - make a list on the spot
949
        if list != type(args.file):
950
            args.file = [args.file]
951
        ##################################
952
        for f in args.file:
953
            if args.inplace:
954
                shutil.copy(f, f + '~')
955
956
            s = RNAStructure(f)
957
958
            output = ''
959
            if not args.no_hr:
960
                output += add_header(version) + '\n'
961
962
            for l in s.lines:
963
                if l.startswith('ANISOU'):
964
                    continue
965
                else:
966
                    output += l + '\n'
967
968
            if args.inplace:
969
                with open(f, 'w') as f:
970
                    f.write(output)
971
            else:  # write: to stdout
972
                try:
973
                    sys.stdout.write(output)
974
                    sys.stdout.flush()
975
                except IOError:
976
                    pass
977
978
979
    if args.swap_chains:
980
        # quick fix - make a list on the spot
981
        if list != type(args.file):
982
            args.file = [args.file]
983
        ##################################
984
        for f in args.file:
985
            if args.inplace:
986
                shutil.copy(f, f + '~')
987
            # rename_chain 'A>B'
988
            s = RNAStructure(f)
989
            output = ''
990
            if not args.no_hr:
991
                output += add_header(version) + '\n'
992
993
            chain_id_old, chain_id_new = args.swap_chains.split('>')
994
            if chain_id_old == chain_id_new:
995
                output = open(f).read() # return the file ;-) itself unchanged
996
            else:
997
                s.rename_chain(chain_id_new, '_')
998
                s.rename_chain(chain_id_old, chain_id_new)
999
                output += s.rename_chain('_', chain_id_old)
1000
1001
            if args.inplace:
1002
                with open(f, 'w') as f:
1003
                    f.write(output)
1004
            else:  # write: to stdout
1005
                try:
1006
                    sys.stdout.write(output)
1007
                    sys.stdout.flush()
1008
                except IOError:
1009
                    pass
1010
1011
    if args.rename_chain:
1012
        # quick fix - make a list on the spot
1013
        if list != type(args.file):
1014
            args.file = [args.file]
1015
        ##################################
1016
        for f in args.file:
1017
            if args.inplace:
1018
                shutil.copy(f, f + '~')
1019
            # rename_chain 'A>B'
1020
            s = RNAStructure(f)
1021
            chain_id_old, chain_id_new = args.rename_chain.split('>')
1022
            output = ''
1023
            if not args.no_hr:
1024
                output += add_header(version) + '\n'
1025
            output += s.rename_chain(chain_id_old, chain_id_new)
1026
            if args.inplace:
1027
                with open(f, 'w') as f:
1028
                    f.write(output)
1029
            else:  # write: to stdout
1030
                try:
1031
                    sys.stdout.write(output)
1032
                    sys.stdout.flush()
1033
                except IOError:
1034
                    pass
1035
1036
1037
    if args.split_alt_locations:
1038
        # quick fix - make a list on the spot
1039
        if list != type(args.file):
1040
            args.file = [args.file]
1041
        ##################################
1042
        for f in args.file:
1043
            if args.inplace:
1044
                shutil.copy(f, f + '~')
1045
1046
            #s = RNAStructure(f)
1047
            s = open(f)
1048
            output = ''
1049
            if not args.no_hr:
1050
                output += add_header(version) + '\n'
1051
1052
            # First, collect all alternative locations.
1053
            alts = set()
1054
            for l in s:
1055
                if l.startswith('ATOM'):
1056
                    if l[16].strip():
1057
                        alts.add(l[16])
1058
            s.close()
1059
1060
            if args.verbose:
1061
                print('alts:', alts)
1062
1063
            for index, alt in enumerate(alts):
1064
                output += 'MODEL %i' % (index + 1)
1065
                s = open(f)
1066
                for l in s:
1067
                    if l.startswith('ATOM') or l.startswith('HETATM'):
1068
                        # if empty, then print this line
1069
                        if l[16] == ' ':
1070
                            output += l
1071
                        if l[16] == alt:
1072
                            output += l
1073
                    else:
1074
                        output += l
1075
                output += 'ENDMDL\n'
1076
                s.close()
1077
1078
            if args.inplace:
1079
                with open(f, 'w') as f:
1080
                    f.write(output)
1081
            else:  # write: to stdout
1082
                try:
1083
                    sys.stdout.write(output)
1084
                    sys.stdout.flush()
1085
                except IOError:
1086
                    pass
1087
1088
    if args.orgmode:
1089
        if args.inplace:
1090
            shutil.copy(args.file, args.file + '~')
1091
        s = RNAStructure(args.file)
1092
        s.decap_gtp()
1093
        s.std_resn()
1094
        s.remove_hydrogen()
1095
        s.remove_ion()
1096
        s.remove_water()
1097
        s.fix_op_atoms()
1098
        s.renum_atoms()
1099
        s.shift_atom_names()
1100
        s.prune_elements()
1101
        # print s.get_preview()
1102
        # s.write(args.outfile)
1103
        # for l in s.lines:
1104
        #    print l
1105
1106
        remarks = s.get_rnapuzzle_ready(
1107
            args.renumber_residues, fix_missing_atoms=True, rename_chains=True, verbose=args.verbose)
1108
1109
        with open(args.file + '~', 'w') as f:
1110
            if not args.no_hr:
1111
                f.write(add_header(version) + '\n')
1112
1113
            f.write('\n'.join(remarks) + '\n')
1114
            f.write(s.get_text())
1115
1116
        try:
1117
            from Bio import PDB
1118
            from Bio.PDB import PDBIO
1119
            import warnings
1120
            warnings.filterwarnings('ignore', '.*Invalid or missing.*',)
1121
            warnings.filterwarnings('ignore', '.*with given element *',)
1122
        except:
1123
            sys.exit('Error: Install biopython to use this function (pip biopython)')
1124
1125
        parser = PDB.PDBParser()
1126
        struct = parser.get_structure('', args.file + '~')
1127
        model = struct[0]
1128
1129
        # chains [['A', 'seq', [residues]]]
1130
        chains = []
1131
        for c in model.get_list():
1132
            seq = ''
1133
            chain = []
1134
            for r in c:
1135
                chain.append(str(r.get_resname().strip()) + str(r.id[1]))
1136
                seq += r.get_resname().strip()
1137
            chains.append([c.id, seq, chain])
1138
1139
        t = []
1140
        #[['A', 'CCGCCGCGCCAUGCCUGUGGCGG', ['C1', 'C2', 'G3', 'C4', 'C5', 'G6', 'C7', 'G8', 'C9', 'C10', 'A11', 'U12', 'G13', 'C14', 'C15', 'U16', 'G17', 'U18', 'G19', 'G20', 'C21', 'G22', 'G23']], ['B', 'CCGCCGCGCCAUGCCUGUGGCGG', ['C1', 'C2', 'G3', 'C4', 'C5', 'G6', 'C7', 'G8', 'C9', 'C10', 'A11', 'U12', 'G13', 'C14', 'C15', 'U16', 'G17', 'U18', 'G19', 'G20', 'C21', 'G22', 'G23']]]
1141
        for c in chains:
1142
            t.append('* ' + c[0] + ':' + c[2][0][1:] + '-' + c[2][-1][1:] + ' ' + c[1])
1143
            for r in c[2]:
1144
                t.append('** ' + c[0] + ':' + r)
1145
        print('\n'.join(t))
1146
1147
    if args.fix:
1148
        # quick fix - make a list on the spot
1149
        if list != type(args.file):
1150
            args.file = [args.file]
1151
        ##################################
1152
        for f in args.file:
1153
            cmd = 'pdbfixer ' + f + ' --add-atoms all --add-residues'
1154
            print(cmd)
1155
            os.system(cmd)
1156
            if args.inplace:
1157
                shutil.move("output.pdb", f)
1158
            else:
1159
                shutil.move("output.pdb", f.replace('.pdb', '_fx.pdb'))
1160
                            
1161
    if args.to_mol2:
1162
        # quick fix - make a list on the spot
1163
        if list != type(args.file):
1164
            args.file = [args.file]
1165
        ##################################
1166
        for f in args.file:
1167
            cmd = 'obabel -i pdb ' + f + ' -o mol2 -O ' + f.replace('.pdb', '.mol2')
1168
            print(cmd)
1169
            os.system(cmd)
1170
1171
1172
    if args.nmr_dir:
1173
        files = sort_strings(glob.glob(args.nmr_dir + '/' + args.file))
1174
1175
        c = 1
1176
1177
        for f in files:
1178
            #s = RNAStructure(f)
1179
            print("MODEL        " + str(c))
1180
            # at some point I could use RNAStructure for this
1181
            print(open(f).read())
1182
            #print(s.get_text(add_end=False))
1183
            print('ENDMDL')
1184
            c += 1
1185
        print('END')
1186
1187
    if args.set_chain:
1188
        if list != type(args.file):
1189
            args.file = [args.file]
1190
1191
        for f in args.file:
1192
            txt = set_chain_for_struc(f, args.set_chain)
1193
            if args.inplace:
1194
                shutil.move(f, f.replace('.pdb', '.pdb~'))
1195
                with open(f, 'w') as fi:
1196
                    fi.write(txt)
1197
            else:
1198
                print(txt)
1199
1200
    if args.remove0:
1201
        if list != type(args.file):
1202
            args.file = [args.file]
1203
1204
        for f in args.file:
1205
            s = RNAStructure(f)
1206
            txt = ''
1207
            for l in s.lines:
1208
                if s.get_atom_coords(l) == (0.0, 0.0, 0.0):
1209
                    continue
1210
                else:
1211
                    txt += l + '\n'
1212
1213
            if args.inplace:
1214
                shutil.move(f, f.replace('.pdb', '.pdb~'))
1215
                with open(f, 'w') as fi:
1216
                    fi.write(txt)
1217
            else:
1218
                print(txt)
1219
1220
    if args.replace_htm:
1221
        if list != type(args.file):
1222
            args.file = [args.file]
1223
1224
        for f in args.file:
1225
            with open(f) as fn:
1226
                txt = fn.read()
1227
            txt = txt.replace('HETATM', 'ATOM  ')
1228
            if args.inplace:
1229
                shutil.move(f, f.replace('.pdb', '.pdb~'))
1230
                with open(f, 'w') as fi:
1231
                    fi.write(txt)
1232
            else:
1233
                print(txt)
1234
1235
    if args.rgyration:
1236
        if list != type(args.file):
1237
            args.file = [args.file]
1238
        # quick fix - make a list on the spot
1239
        if list != type(args.file):
1240
            args.file = [args.file]
1241
        ##################################
1242
        analyzed = []
1243
        for f in args.file:
1244
            #####################################
1245
            if args.uniq:
1246
                subname = eval('f' + args.uniq)
1247
                if subname in analyzed:
1248
                    continue
1249
                else:
1250
                    analyzed.append(subname)
1251
1252
            s = RNAStructure(f)
1253
            if not s.is_pdb():
1254
                print('Error: Not a PDB file %s' % f)
1255
                sys.exit(1)
1256
            s.decap_gtp()
1257
            s.std_resn()
1258
            s.remove_hydrogen()
1259
            s.remove_ion()
1260
            s.remove_water()
1261
            if args.renum_atoms:
1262
                s.renum_atoms()
1263
            s.fix_O_in_UC()
1264
            s.fix_op_atoms()
1265
1266
            output = ''
1267
1268
            # with # is easier to grep this out
1269
            if args.fasta:
1270
                # s.fn vs s.name
1271
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, fasta=args.fasta, addfn=s.name, color=args.color_seq) + '\n'
1272
            elif args.oneline:
1273
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq).strip() + ' # '+ os.path.basename(f.replace('.pdb', '')) + '\n'
1274
            else:
1275
                N = len(s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq))
1276
                output += '' + os.path.basename(f.replace('.pdb', '')).ljust(60) + ' seq len: ' + str(N) + ' '
1277
            try:
1278
                sys.stdout.write(output)
1279
                sys.stdout.flush()
1280
            except IOError:
1281
                pass
1282
1283
            import math
1284
            def calc_rg():
1285
                    """modified after https://github.com/sarisabban/Rg"""
1286
                    coord = list()
1287
                    mass = list()
1288
                    for line in s.lines:
1289
                            try:
1290
                                    line = line.split()
1291
                                    x = float(line[6])
1292
                                    y = float(line[7])
1293
                                    z = float(line[8])
1294
                                    coord.append([x, y, z])
1295
                                    atom = line[2][0]
1296
                                    if atom == 'C':
1297
                                            mass.append(12.0107)
1298
                                    elif atom == 'O':
1299
                                            mass.append(15.9994)
1300
                                    elif atom == 'N':
1301
                                            mass.append(14.0067)
1302
                                    elif atom == 'S':
1303
                                            mass.append(32.065)
1304
                                    elif atom == 'P':
1305
                                            mass.append(30.97)
1306
                            except:
1307
                                    pass
1308
1309
                    xm = [(m*i, m*j, m*k) for (i, j, k), m in zip(coord, mass)]
1310
                    tmass = sum(mass)
1311
                    rr = sum(mi*i + mj*j + mk*k for (i, j, k), (mi, mj, mk) in zip(coord, xm))
1312
                    mm = sum((sum(i) / tmass)**2 for i in zip(*xm))
1313
                    rg = math.sqrt(rr / tmass-mm)
1314
                    return(round(rg, 3))
1315
1316
            def radius_of_gyration(N):
1317
                    return round(5.5 * (N ** (1/3)), 2)
1318
1319
            rg = calc_rg()
1320
            rgn = radius_of_gyration(N)
0 ignored issues
show
introduced by
The variable N does not seem to be defined in case the for loop on line 1243 is not entered. Are you sure this can never be the case?
Loading history...
1321
            print(f'radius of gyration: {rg}; expected for this seqlen ({N}): {rgn}, {round(rg / rgn * 100, 2)}%')
1322
1323
    if args.renum_nmr:
1324
        if list != type(args.file):
1325
            args.file = [args.file]
1326
        txt = ''
1327
        for f in args.file:
1328
            c = 1
1329
            for l in open(f):
1330
                if l.startswith('MODEL'):
1331
                    txt += "MODEL       " + str(c) + '\n'
1332
                    c += 1
1333
                elif l.strip() == 'END':
1334
                    pass
1335
                else:
1336
                    txt += l
1337
        if args.inplace:
1338
            shutil.move(f, f.replace('.pdb', '.pdb~'))
0 ignored issues
show
introduced by
The variable f does not seem to be defined for all execution paths.
Loading history...
1339
            with open(f) as fi:
1340
                fi.write(txt)
1341
        else:
1342
            print(txt)
1343
            
1344
    from rna_tools.rna_tools_config import PYMOL_PATH
1345
    sys.path.insert(0, PYMOL_PATH)
1346 View Code Duplication
    if args.cif2pdb:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1347
        # quick fix - make a list on the spot
1348
        if list != type(args.file):
1349
            args.file = [args.file]
1350
        ##################################
1351
        for cif_file in args.file:
1352
            from Bio.PDB import MMCIFParser, PDBIO
1353
            parser = MMCIFParser()
1354
            structure = parser.get_structure("structure_id", cif_file)
1355
            pdb_file = cif_file.replace('.cif', '_fCIF.pdb')
1356
1357
1358
            try:
1359
                # Save to PDB format
1360
                io = PDBIO()
1361
                io.set_structure(structure)
1362
                io.save(pdb_file)
1363
1364
                print(f'saved: {pdb_file}')
1365
                # open a file add remarks
1366
                new_file = ''
1367
                with open(pdb_file, 'r') as f:
1368
                    if not args.no_hr:
1369
                        new_file += add_header(version) + '\n'
1370
                    new_file += f.read()
1371
1372
                with open(pdb_file, 'w') as f:
1373
                    f.write(new_file)
1374
1375
            except:
1376
                print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.')
1377
1378
                def has_high_rna_content(chain, threshold=0.8):
1379
                    # RNA nucleotides: A, C, G, U, and X (you can modify as needed)
1380
                    rna_nucleotides = ['A', 'C', 'G', 'U', 'X']
1381
                    total_residues = 0
1382
                    rna_residues = 0
1383
1384
                    # Count the total number of residues and RNA-like residues
1385
                    for residue in chain:
1386
                        total_residues += 1
1387
                        if residue.get_resname().strip() in rna_nucleotides:
1388
                            rna_residues += 1
1389
1390
                    # Calculate the proportion of RNA residues
1391
                    if total_residues == 0:
1392
                        return False  # Avoid division by zero if chain has no residues
1393
1394
                    rna_percentage = rna_residues / total_residues
1395
1396
                    # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default)
1397
                    return rna_percentage >= threshold
1398
1399
                from Bio.PDB.MMCIFParser import MMCIFParser
1400
                from Bio.PDB import MMCIFParser, Structure, Model, Chain
1401
                
1402
                # Initialize the parser
1403
                parser = MMCIFParser()
1404
1405
                # Parse the structure
1406
                structure = parser.get_structure("structure", cif_file)
1407
1408
                # Create a list of single-letter chain identifiers
1409
                import string
1410
                letters = list(string.ascii_uppercase)
1411
1412
                for model in structure:
1413
                    for chain in model:
1414
                        if has_high_rna_content(chain):
1415
                            # New structure
1416
                            new_structure = Structure.Structure("new_structure")
1417
                            new_model = Model.Model(0)  # Create a new model
1418
                            new_structure.add(new_model)  # Add the new model to the new structure
1419
1420
                            chain_id_new = letters.pop(0)
1421
                            chain_id = chain.get_id()
1422
1423
                            atom_count = 0
1424
                            for residue in chain:
1425
                                  for atom in residue:
1426
                                       atom_count += 1
1427
1428
                            remarks = []
1429
                            remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}')
1430
1431
                            pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb')
1432
                            print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}')
1433
1434
                            chain.id = chain_id_new
1435
                            new_model.add(chain)
1436
1437
                            io = PDBIO()
1438
                            io.set_structure(new_structure)
1439
                            
1440
                            io.save(pdb_file)
1441
                            # open a file add remarks
1442
                            new_file = ''
1443
                            with open(pdb_file, 'r') as f:
1444
                                if not args.no_hr:
1445
                                    new_file += add_header(version) + '\n'
1446
                                if remarks:
1447
                                    new_file += '\n'.join(remarks) + '\n'
1448
                                new_file += f.read()
1449
1450
                            with open(pdb_file, 'w') as f:
1451
                                f.write(new_file)
1452
1453
    if args.pdb2cif:
1454
        try:
1455
            from pymol import cmd
1456
        except ImportError:
1457
            print('This functionality needs PyMOL. Install it or if installed, check your setup')
1458
            sys.exit(1)
1459
1460
        # quick fix - make a list on the spot
1461
        if list != type(args.file):
1462
            args.file = [args.file]
1463
        ##################################
1464
        for f in args.file:
1465
            cmd.load(f)
1466
            fo = f.replace('.pdb', '.cif')
1467
            cmd.save(fo, '(all)')
1468
            cmd.delete('all')
1469
            print(fo, 'saved')
1470