1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
|
4
|
|
|
"""rna_pdb_tools - a swiss army knife to manipulation of RNA pdb structures |
5
|
|
|
|
6
|
|
|
Usage:: |
7
|
|
|
|
8
|
|
|
$ rna_pdb_tools.py --delete A:46-56 --inplace *.pdb |
9
|
|
|
|
10
|
|
|
$ rna_pdb_tools.py --get-seq * |
11
|
|
|
# BujnickiLab_RNApuzzle14_n01bound |
12
|
|
|
> A:1-61 |
13
|
|
|
# BujnickiLab_RNApuzzle14_n02bound |
14
|
|
|
> A:1-61 |
15
|
|
|
CGUUAGCCCAGGAAACUGGGCGGAAGUAAGGCCCAUUGCACUCCGGGCCUGAAGCAACGCG |
16
|
|
|
[...] |
17
|
|
|
|
18
|
|
|
See `rna_pdb_merge_into_one.py` to merge PDB files in the order as you like into one NMR-like (multimodel) file |
19
|
|
|
|
20
|
|
|
Examples:: |
21
|
|
|
|
22
|
|
|
rna_pdb_tools.py --backbone-only --get-rnapuzzle-ready --inplace --suffix=bo examples/4GXY_min.pdb |
23
|
|
|
|
24
|
|
|
To extract specific atoms for each residue and write them to separate PDB file (next to the input files, following syntax "<input>_<resid>.pdb"):: |
25
|
|
|
|
26
|
|
|
rna_pdb_tools.py --rpr input/4GXY_min.pdb --save-single-res --ref-frame-only |
27
|
|
|
|
28
|
|
|
Atoms presets:: |
29
|
|
|
|
30
|
|
|
--backbone-only used only with --get-rnapuzzle-ready, keep only backbone (= remove bases) |
31
|
|
|
--ref-frame-only used only with --get-rnapuzzle-ready, keep only reference frames, OP1 OP2 P |
32
|
|
|
--no-backbone used only with --get-rnapuzzle-ready, remove atoms of backbone (define as P OP1 OP2 O5') |
33
|
|
|
--bases-only used only with --get-rnapuzzle-ready, keep only atoms of bases |
34
|
|
|
|
35
|
|
|
.. image:: ../pngs/276411138-236435ff-2944-4bec-ab75-dca0d1e3aacf.jpg |
36
|
|
|
|
37
|
|
|
|
38
|
|
|
-v is for verbose, --version for version ;) |
39
|
|
|
""" |
40
|
|
|
import argparse |
41
|
|
|
import textwrap |
42
|
|
|
import os |
43
|
|
|
import shutil |
44
|
|
|
import sys |
45
|
|
|
import tempfile |
46
|
|
|
import glob |
47
|
|
|
import os |
48
|
|
|
from rna_tools.rna_tools_lib import edit_pdb, add_header, get_version, \ |
49
|
|
|
collapsed_view, fetch, fetch_ba, fetch_cif, replace_chain, RNAStructure, \ |
50
|
|
|
select_pdb_fragment, sort_strings, set_chain_for_struc |
51
|
|
|
from rna_tools.tools.rna_x3dna.rna_x3dna import x3DNA |
52
|
|
|
|
53
|
|
|
|
54
|
|
|
def get_parser(): |
55
|
|
|
version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__) |
56
|
|
|
version = version[1].strip() |
57
|
|
|
parser = argparse.ArgumentParser( |
58
|
|
|
description=__doc__, formatter_class=argparse.RawTextHelpFormatter) |
59
|
|
|
|
60
|
|
|
parser.add_argument('--version', help='', action='version', version=version) |
61
|
|
|
|
62
|
|
|
parser.add_argument('-r', '--report', help='get report', |
63
|
|
|
action='store_true') |
64
|
|
|
|
65
|
|
|
parser.add_argument('--no-progress-bar', help='for --no-progress-bar for --rpr', action='store_true') |
66
|
|
|
|
67
|
|
|
parser.add_argument('--renum-atoms', help='renumber atoms, tested with --get-seq', |
68
|
|
|
action='store_true') |
69
|
|
|
|
70
|
|
|
parser.add_argument('--renum-nmr', help='', |
71
|
|
|
action='store_true') |
72
|
|
|
|
73
|
|
|
parser.add_argument('--renum-residues-dirty', help='', action='store_true') |
74
|
|
|
|
75
|
|
|
parser.add_argument('--undo', help='undo operation of action done --inplace, , rename "backup files" .pdb~ to pdb, ALL files in the folder, not only ~ related to the last action (that you might want to revert, so be careful)', action='store_true') |
76
|
|
|
|
77
|
|
|
parser.add_argument('--delete-anisou', help='remove files with ANISOU records, works with --inplace', |
78
|
|
|
action='store_true') |
79
|
|
|
|
80
|
|
|
parser.add_argument('--remove0', help='remove atoms of X=0 position', |
81
|
|
|
action='store_true') |
82
|
|
|
|
83
|
|
|
parser.add_argument('--fix', help='fix a PDB file, ! external program, pdbfixer used to fix missing atoms', |
84
|
|
|
action='store_true') |
85
|
|
|
|
86
|
|
|
parser.add_argument('--to-mol2', help='fix a PDB file, ! external program, pdbfixer used to fix missing atoms', |
87
|
|
|
action='store_true') |
88
|
|
|
|
89
|
|
|
parser.add_argument('--split-alt-locations', help='splits atoms, e.g. for alt locs A and B, it splits atoms two MODELS (all localizations A goes into MODEL1 and all localizations B goes into MODEL2', |
90
|
|
|
action='store_true') |
91
|
|
|
|
92
|
|
|
parser.add_argument('-c', '--clean', help='get clean structure', |
93
|
|
|
action='store_true') |
94
|
|
|
|
95
|
|
|
parser.add_argument('--is-pdb', help='check if a file is in the pdb format', |
96
|
|
|
action='store_true') |
97
|
|
|
|
98
|
|
|
parser.add_argument('--is-nmr', help='check if a file is NMR-style multiple model pdb', |
99
|
|
|
action='store_true') |
100
|
|
|
|
101
|
|
|
parser.add_argument('--nmr-dir', help='make NMR-style multiple model pdb file from a set of files \n\n' + |
102
|
|
|
" rna_pdb_tools.py --nmr-dir . 'cwc15_u5_fragments*.pdb' > ~/Desktop/cwc15-u5.pdb\n\n" + |
103
|
|
|
"please use '' for pattern file recognition, this is a hack to deal with folders with\n" |
104
|
|
|
"thousands of models, if you used only *.pdb then the terminal will complain that you\n" |
105
|
|
|
"selected to many files.") |
106
|
|
|
|
107
|
|
|
parser.add_argument('--un-nmr', help="""split NMR-style multiple model pdb files into individual models [biopython], |
108
|
|
|
|
109
|
|
|
rna_pdb_tools.py --un-nmr split.pdb |
110
|
|
|
2 |
111
|
|
|
/Users/magnus/Desktop/3hl2/split_1.pdb |
112
|
|
|
/Users/magnus/Desktop/3hl2/split_2.pdb |
113
|
|
|
|
114
|
|
|
""", |
115
|
|
|
action='store_true') |
116
|
|
|
|
117
|
|
|
parser.add_argument('--orgmode', help='get a structure in org-mode format <sick!>', |
118
|
|
|
action='store_true') |
119
|
|
|
|
120
|
|
|
parser.add_argument('--get-chain', help='get chain, one or many, e.g, A, but now also ABC works') |
121
|
|
|
|
122
|
|
|
parser.add_argument('--fetch', action='store_true', help='fetch file from the PDB db, e.g., 1xjr,\nuse \'rp\' to fetch, fetch a given join, 4w90:C or 4w90_C' + |
123
|
|
|
'the RNA-Puzzles standardized_dataset [around 100 MB]') |
124
|
|
|
|
125
|
|
|
parser.add_argument('--fetch-cif', action='store_true', help='') |
126
|
|
|
|
127
|
|
|
parser.add_argument('--fetch-header', action='store_true', help='') |
128
|
|
|
|
129
|
|
|
parser.add_argument('--fetch-ba', action='store_true', |
130
|
|
|
help='fetch biological assembly from the PDB db') |
131
|
|
|
|
132
|
|
|
parser.add_argument('--fetch-chain', action='store_true', help='fetch a structure in extract chain, e.g. 6bk8 H') |
133
|
|
|
|
134
|
|
|
parser.add_argument('--fetch-fasta', action='store_true', help='fetch a fasta/sequence for given PDB ID, e.g. 6bk8') |
135
|
|
|
|
136
|
|
|
parser.add_argument('--get-seq', help='get seq', action='store_true') |
137
|
|
|
|
138
|
|
|
parser.add_argument('--rgyration', help='get seq', action='store_true') |
139
|
|
|
|
140
|
|
|
parser.add_argument('--color-seq', help='color seq, works with --get-seq', action='store_true') |
141
|
|
|
|
142
|
|
|
parser.add_argument('--ignore-files', help='files to be ingored, .e.g, \'solution\'') |
143
|
|
|
|
144
|
|
|
parser.add_argument('--compact', |
145
|
|
|
help=textwrap.dedent("""with --get-seq, get it in compact view' |
146
|
|
|
$ rna_pdb_tools.py --get-seq --compact *.pdb |
147
|
|
|
# 20_Bujnicki_1 |
148
|
|
|
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68 |
149
|
|
|
# 20_Bujnicki_2 |
150
|
|
|
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68 |
151
|
|
|
# 20_Bujnicki_3 |
152
|
|
|
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68 |
153
|
|
|
# 20_Bujnicki_4 |
154
|
|
|
|
155
|
|
|
"""), action='store_true') |
156
|
|
|
|
157
|
|
|
parser.add_argument('--hide-warnings', help='hide warnings, works with --get-chain, it hides warnings that given changes are not detected in a PDB file', action='store_true') |
158
|
|
|
|
159
|
|
|
parser.add_argument('--get-ss', help='get secondary structure', action='store_true') |
160
|
|
|
|
161
|
|
|
parser.add_argument('--rosetta2generic', help='convert ROSETTA-like format to a generic pdb', |
162
|
|
|
action='store_true') |
163
|
|
|
|
164
|
|
|
parser.add_argument('--no-hr', help='do not insert the header into files', |
165
|
|
|
action='store_true') |
166
|
|
|
|
167
|
|
|
parser.add_argument('--renumber-residues', help='by defult is false', |
168
|
|
|
action='store_true') |
169
|
|
|
|
170
|
|
|
parser.add_argument('--dont-rename-chains', |
171
|
|
|
help=textwrap.dedent("""used only with --get-rnapuzzle-ready. |
172
|
|
|
By default: |
173
|
|
|
--get-rnapuzzle-ready rename chains from ABC.. to stop behavior switch on this option |
174
|
|
|
"""), |
175
|
|
|
action='store_true') |
176
|
|
|
|
177
|
|
|
parser.add_argument('--dont-fix-missing-atoms', |
178
|
|
|
help="""used only with --get-rnapuzzle-ready""", |
179
|
|
|
action='store_true') |
180
|
|
|
|
181
|
|
|
parser.add_argument('--inspect', |
182
|
|
|
help="inspect missing atoms (technically decorator to --get-rnapuzzle-ready without actually doing anything but giving a report on problems)", |
183
|
|
|
action='store_true') |
184
|
|
|
|
185
|
|
|
parser.add_argument('--collapsed-view', help='', |
186
|
|
|
action='store_true') |
187
|
|
|
|
188
|
|
|
parser.add_argument('--cv', help='alias to collapsed_view', |
189
|
|
|
action='store_true') |
190
|
|
|
|
191
|
|
|
parser.add_argument('-v', '--verbose', help='tell me more what you\'re doing, please!', |
192
|
|
|
action='store_true') |
193
|
|
|
|
194
|
|
|
parser.add_argument('--mutate', help=textwrap.dedent("""mutate residues, |
195
|
|
|
e.g., |
196
|
|
|
--mutate "A:1a+2a+3a+4a,B:1a" |
197
|
|
|
to mutate to adenines the first four nucleotides of the chain A |
198
|
|
|
and the first nucleotide of the chain B""")) |
199
|
|
|
|
200
|
|
|
parser.add_argument('--edit', |
201
|
|
|
dest="edit", |
202
|
|
|
default='', |
203
|
|
|
help="edit 'A:6>B:200', 'A:2-7>B:2-7'") |
204
|
|
|
|
205
|
|
|
parser.add_argument('--rename-chain', |
206
|
|
|
help="edit 'A>B' to rename chain A to chain B") |
207
|
|
|
|
208
|
|
|
parser.add_argument('--swap-chains', help='B>A, rename A to _, then B to A, then _ to B') |
209
|
|
|
|
210
|
|
|
parser.add_argument('--set-chain', help='set chain for all ATOM lines and TER (quite brutal function)') |
211
|
|
|
|
212
|
|
|
parser.add_argument('--replace-chain', |
213
|
|
|
default='', |
214
|
|
|
help=textwrap.dedent("""a file PDB name with one chain that will be used to |
215
|
|
|
replace the chain in the original PDB file, |
216
|
|
|
the chain id in this file has to be the same with the chain id of the original chain""")) |
217
|
|
|
|
218
|
|
|
parser.add_argument('--delete', # type="string", |
219
|
|
|
dest="delete", |
220
|
|
|
default='', |
221
|
|
|
help="delete the selected fragment, e.g. A:10-16, or for more than one fragment --delete 'A:1-25+30-57'") |
222
|
|
|
|
223
|
|
|
parser.add_argument('--extract', # type="string", |
224
|
|
|
dest="extract", |
225
|
|
|
default='', |
226
|
|
|
help="extract the selected fragment, e.g. A:10-16, or for more than one fragment --extract 'A:1-25+30-57', or even 'A:1-25+B:30-57'") |
227
|
|
|
|
228
|
|
|
parser.add_argument('--extract-chain', |
229
|
|
|
help="extract chain, e.g. A") |
230
|
|
|
|
231
|
|
|
parser.add_argument('--uniq', help=textwrap.dedent(""" |
232
|
|
|
rna_pdb_tools.py --get-seq --uniq '[:5]' --compact --chain-first * | sort |
233
|
|
|
A:1-121 ACCUUGCGCAACUGGCGAAUCCUGGGGCUGCCGCCGGCAGUACCC...CA # rp13nc3295_min.out.1 |
234
|
|
|
A:1-123 ACCUUGCGCGACUGGCGAAUCCUGAAGCUGCUUUGAGCGGCUUCG...AG # rp13cp0016_min.out.1 |
235
|
|
|
A:1-123 ACCUUGCGCGACUGGCGAAUCCUGAAGCUGCUUUGAGCGGCUUCG...AG # zcp_6537608a_ALL-000001_AA |
236
|
|
|
A:1-45 57-71 GGGUCGUGACUGGCGAACAGGUGGGAAACCACCGGGGAGCGACCCGCCGCCCGCCUGGGC # solution |
237
|
|
|
""")) |
238
|
|
|
|
239
|
|
|
parser.add_argument('--chain-first', help="", action='store_true') |
240
|
|
|
parser.add_argument('--oneline', help="", action='store_true') |
241
|
|
|
|
242
|
|
|
parser.add_argument('--replace-htm', help="", action='store_true') |
243
|
|
|
|
244
|
|
|
parser.add_argument('--fasta', |
245
|
|
|
help= textwrap.dedent("""with --get-seq, show sequences in fasta format, |
246
|
|
|
can be combined with --compact (mind, chains will be separated with ' ' in one line) |
247
|
|
|
|
248
|
|
|
$ rna_pdb_tools.py --get-seq --fasta --compact input/20_Bujnicki_1.pdb |
249
|
|
|
> 20_Bujnicki_1 |
250
|
|
|
ACCCGCAAGGCCGACGGC GCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU |
251
|
|
|
|
252
|
|
|
"""), action='store_true') |
253
|
|
|
|
254
|
|
|
parser.add_argument('--cif2pdb', help="convert cif to PDB, fancy way", action='store_true') |
255
|
|
|
parser.add_argument('--pdb2cif', help="[PyMOL Python package required]", action='store_true') |
256
|
|
|
|
257
|
|
|
parser.add_argument('--mdr', help='get structures ready for MD (like rpr but without first)', |
258
|
|
|
action='store_true') |
259
|
|
|
|
260
|
|
|
x = parser.add_argument_group('RNAPUZZLE-READY') |
261
|
|
|
x.add_argument('--get-rnapuzzle-ready', |
262
|
|
|
help=textwrap.dedent("""get RNApuzzle ready (keep only standard atoms).' |
263
|
|
|
Be default it does not renumber residues, use --renumber-residues |
264
|
|
|
[requires BioPython]"""), action='store_true') |
265
|
|
|
|
266
|
|
|
x.add_argument('--rpr', help='alias to get_rnapuzzle ready)', |
267
|
|
|
action='store_true') |
268
|
|
|
|
269
|
|
|
|
270
|
|
|
rpr = parser.add_argument_group('CAN BE COMBINED WITH')# --get-rnapuzzle-ready (or --rpr) can be combined with') |
271
|
|
|
|
272
|
|
|
rpr.add_argument('--keep-hetatm', help='keep hetatoms', action='store_true') |
273
|
|
|
|
274
|
|
|
rpr.add_argument('--inplace', help=textwrap.dedent("""in place edit the file! [experimental, |
275
|
|
|
only for get_rnapuzzle_ready, --delete, --get-ss, --get-seq, --edit-pdb]"""), |
276
|
|
|
action='store_true') |
277
|
|
|
|
278
|
|
|
rpr.add_argument('--here', help=textwrap.dedent("""save a file next to the original file with auto suffix |
279
|
|
|
for --extract it's .extr.pdb"""), |
280
|
|
|
action='store_true') |
281
|
|
|
|
282
|
|
|
rpr.add_argument('--suffix', help=textwrap.dedent("""when used with --inplace allows you to change a name of a new file, --suffix del will give <file>_del.pdb (mind added _)""")) |
283
|
|
|
|
284
|
|
|
rpr.add_argument('--replace-hetatm', help="replace 'HETATM' with 'ATOM' [tested only with --get-rnapuzzle-ready]", |
285
|
|
|
action="store_true") |
286
|
|
|
|
287
|
|
|
rpr.add_argument('--dont-report-missing-atoms', |
288
|
|
|
help="""used only with --get-rnapuzzle-ready""", |
289
|
|
|
action='store_true') |
290
|
|
|
|
291
|
|
|
rpr.add_argument('--backbone-only', |
292
|
|
|
help="""used only with --get-rnapuzzle-ready, keep only backbone (= remove bases)""", |
293
|
|
|
action='store_true') |
294
|
|
|
|
295
|
|
|
rpr.add_argument('--p-only', |
296
|
|
|
help="""used only with --get-rnapuzzle-ready, keep p backbone (= remove bases)""", |
297
|
|
|
action='store_true') |
298
|
|
|
|
299
|
|
|
rpr.add_argument('--ref-frame-only', |
300
|
|
|
help="""used only with --get-rnapuzzle-ready, keep only reference frames, OP1 OP2 P""", |
301
|
|
|
action='store_true') |
302
|
|
|
|
303
|
|
|
rpr.add_argument('--no-backbone', |
304
|
|
|
help="""used only with --get-rnapuzzle-ready, remove atoms of backbone (define as P OP1 OP2 O5')""", |
305
|
|
|
action='store_true') |
306
|
|
|
|
307
|
|
|
rpr.add_argument('--bases-only', |
308
|
|
|
help="""used only with --get-rnapuzzle-ready, keep only atoms of bases""", |
309
|
|
|
action='store_true') |
310
|
|
|
|
311
|
|
|
rpr.add_argument('--save-single-res', |
312
|
|
|
help="""used only with --get-rnapuzzle-ready, for each residue create a new pdb output file, you can combine it with --bases-only etc.""", |
313
|
|
|
action='store_true') |
314
|
|
|
|
315
|
|
|
parser.add_argument('file', help='file', nargs='+') |
316
|
|
|
#parser.add_argument('outfile', help='outfile') |
317
|
|
|
return parser |
318
|
|
|
|
319
|
|
|
|
320
|
|
|
# main |
321
|
|
|
if __name__ == '__main__': |
322
|
|
|
# get version |
323
|
|
|
version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__) |
324
|
|
|
version = version[1].strip() |
325
|
|
|
|
326
|
|
|
# get parser and arguments |
327
|
|
|
parser = get_parser() |
328
|
|
|
|
329
|
|
|
args = parser.parse_args() |
330
|
|
|
|
331
|
|
|
# quick fix for one files vs file-s |
332
|
|
|
if list == type(args.file) and len(args.file) == 1: |
333
|
|
|
args.file = args.file[0] |
334
|
|
|
|
335
|
|
|
if args.report: |
336
|
|
|
s = RNAStructure(args.file) |
337
|
|
|
print(s.get_report) |
338
|
|
|
print(s.get_preview()) |
339
|
|
|
print(s.get_info_chains()) |
340
|
|
|
|
341
|
|
|
if args.clean: |
342
|
|
|
s = RNAStructure(args.file) |
343
|
|
|
s.decap_gtp() |
344
|
|
|
s.std_resn() |
345
|
|
|
s.remove_hydrogen() |
346
|
|
|
s.remove_ion() |
347
|
|
|
s.remove_water() |
348
|
|
|
s.renum_atoms() |
349
|
|
|
s.fix_O_in_UC() |
350
|
|
|
s.fix_op_atoms() |
351
|
|
|
# print s.get_preview() |
352
|
|
|
# s.write(args.outfile) |
353
|
|
|
if not args.no_hr: |
354
|
|
|
print(add_header(version)) |
355
|
|
|
print(s.get_text()) |
356
|
|
|
|
357
|
|
|
if args.get_seq: |
358
|
|
|
# quick fix - make a list on the spot |
359
|
|
|
if list != type(args.file): |
360
|
|
|
args.file = [args.file] |
361
|
|
|
################################## |
362
|
|
|
analyzed = [] |
363
|
|
|
for f in args.file: |
364
|
|
|
##################################### |
365
|
|
|
if args.uniq: |
366
|
|
|
subname = eval('f' + args.uniq) |
367
|
|
|
if subname in analyzed: |
368
|
|
|
continue |
369
|
|
|
else: |
370
|
|
|
analyzed.append(subname) |
371
|
|
|
######## |
372
|
|
|
s = RNAStructure(f) |
373
|
|
|
if not s.is_pdb(): |
374
|
|
|
print('Error: Not a PDB file %s' % f) |
375
|
|
|
sys.exit(1) |
376
|
|
|
s.decap_gtp() |
377
|
|
|
s.std_resn() |
378
|
|
|
s.remove_hydrogen() |
379
|
|
|
s.remove_ion() |
380
|
|
|
s.remove_water() |
381
|
|
|
if args.renum_atoms: |
382
|
|
|
s.renum_atoms() |
383
|
|
|
s.fix_O_in_UC() |
384
|
|
|
s.fix_op_atoms() |
385
|
|
|
|
386
|
|
|
output = '' |
387
|
|
|
|
388
|
|
|
# with # is easier to grep this out |
389
|
|
|
if args.fasta: |
390
|
|
|
# s.fn vs s.name |
391
|
|
|
output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, fasta=args.fasta, addfn=s.name, color=args.color_seq) + '\n' |
392
|
|
|
elif args.oneline: |
393
|
|
|
output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq).strip() + ' # '+ os.path.basename(f.replace('.pdb', '')) + '\n' |
394
|
|
|
else: |
395
|
|
|
output += '# ' + os.path.basename(f.replace('.pdb', '')) + '\n' |
396
|
|
|
output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq) + '\n' |
397
|
|
|
|
398
|
|
|
try: |
399
|
|
|
sys.stdout.write(output) |
400
|
|
|
sys.stdout.flush() |
401
|
|
|
except IOError: |
402
|
|
|
pass |
403
|
|
|
|
404
|
|
|
if args.get_ss: |
405
|
|
|
# quick fix - make a list on the spot |
406
|
|
|
if list != type(args.file): |
407
|
|
|
args.file = [args.file] |
408
|
|
|
################################## |
409
|
|
|
for f in args.file: |
410
|
|
|
output = f + '\n' |
411
|
|
|
p = x3DNA(f) |
412
|
|
|
output += p.get_secstruc() + '\n' |
413
|
|
|
try: |
414
|
|
|
sys.stdout.write(output) |
415
|
|
|
sys.stdout.flush() |
416
|
|
|
except IOError: |
417
|
|
|
pass |
418
|
|
|
|
419
|
|
|
# getchain |
420
|
|
|
if args.get_chain: |
421
|
|
|
s = RNAStructure(args.file) |
422
|
|
|
## s.std_resn() |
423
|
|
|
## s.remove_hydrogen() |
424
|
|
|
## s.remove_ion() |
425
|
|
|
## s.remove_water() |
426
|
|
|
## s.renum_atoms() |
427
|
|
|
## s.fix_O_in_UC() |
428
|
|
|
## s.fix_op_atoms() |
429
|
|
|
# print s.get_preview() |
430
|
|
|
warningmsg = '' |
431
|
|
|
for chain in list(args.get_chain): |
432
|
|
|
chain_txt = s.get_chain(chain) |
433
|
|
|
if not chain_txt.strip(): |
434
|
|
|
warningmsg += 'Warning: Chain %s not detected!' % chain |
435
|
|
|
else: |
436
|
|
|
print(chain_txt) |
437
|
|
|
if not args.hide_warnings: |
438
|
|
|
if warningmsg: |
439
|
|
|
print(warningmsg) |
440
|
|
|
|
441
|
|
|
if args.rosetta2generic: |
442
|
|
|
s = RNAStructure(args.file) |
443
|
|
|
s.std_resn() |
444
|
|
|
s.remove_hydrogen() |
445
|
|
|
s.remove_ion() |
446
|
|
|
s.remove_water() |
447
|
|
|
s.fix_op_atoms() |
448
|
|
|
s.renum_atoms() |
449
|
|
|
# print s.get_preview() |
450
|
|
|
# s.write(args.outfile) |
451
|
|
|
if not args.no_hr: |
452
|
|
|
print(add_header(version)) |
453
|
|
|
print(s.get_text()) |
454
|
|
|
|
455
|
|
|
remarks_only = False |
456
|
|
|
if args.inspect: |
457
|
|
|
args.get_rnapuzzle_ready = True |
458
|
|
|
fix_missing_atom = False |
459
|
|
|
remarks_only = True |
460
|
|
|
args.dont_rename_chains = True |
461
|
|
|
args.renumber_residues = False |
462
|
|
|
|
463
|
|
|
if args.get_rnapuzzle_ready or args.rpr or args.mdr: # rnapuzzle |
464
|
|
|
# quick fix - make a list on the spot |
465
|
|
|
if list != type(args.file): |
466
|
|
|
args.file = [args.file] |
467
|
|
|
################################## |
468
|
|
|
# progress bar only in --inplace mode! |
469
|
|
|
if args.inplace: |
470
|
|
|
if not args.no_progress_bar: |
471
|
|
|
import progressbar |
472
|
|
|
bar = progressbar.ProgressBar(max_value=len(args.file)) |
473
|
|
|
bar.update(0) |
474
|
|
|
|
475
|
|
|
for c, f in enumerate(args.file): |
476
|
|
|
if args.verbose: |
477
|
|
|
print(f) |
478
|
|
|
if args.inplace: |
479
|
|
|
shutil.copy(f, f + '~') |
480
|
|
|
|
481
|
|
|
# keep previous edits |
482
|
|
|
previous_edits = [] |
483
|
|
|
with open(f) as fx: |
484
|
|
|
for l in fx: |
485
|
|
|
if l.startswith('HEADER --'): |
486
|
|
|
previous_edits.append(l.strip()) |
487
|
|
|
###################### |
488
|
|
|
|
489
|
|
|
s = RNAStructure(f) |
490
|
|
|
if args.replace_hetatm: |
491
|
|
|
s.replace_hetatms() |
492
|
|
|
|
493
|
|
|
s.remove_hydrogen() |
494
|
|
|
s.decap_gtp() |
495
|
|
|
s.std_resn() |
496
|
|
|
s.fix_op_atoms() |
497
|
|
|
|
498
|
|
|
s.remove_ion() |
499
|
|
|
s.remove_water() |
500
|
|
|
# s.renum_atoms() |
501
|
|
|
|
502
|
|
|
s.shift_atom_names() |
503
|
|
|
s.prune_elements() |
504
|
|
|
|
505
|
|
|
# s.write('tmp.pdb') |
506
|
|
|
|
507
|
|
|
rename_chains = False if args.dont_rename_chains else True |
508
|
|
|
|
509
|
|
|
report_missing_atoms = not args.dont_report_missing_atoms |
510
|
|
|
fix_missing_atom = not args.dont_fix_missing_atoms |
511
|
|
|
|
512
|
|
|
ignore_op3 = False |
513
|
|
|
if args.mdr: |
514
|
|
|
ignore_op3 = True |
515
|
|
|
|
516
|
|
|
remarks = s.get_rnapuzzle_ready(args.renumber_residues, fix_missing_atoms=fix_missing_atom, |
517
|
|
|
rename_chains=rename_chains, |
518
|
|
|
report_missing_atoms=report_missing_atoms, |
519
|
|
|
backbone_only=args.backbone_only, |
520
|
|
|
no_backbone=args.no_backbone, |
521
|
|
|
bases_only=args.bases_only, |
522
|
|
|
p_only=args.p_only, |
523
|
|
|
keep_hetatm=args.keep_hetatm, |
524
|
|
|
ignore_op3=ignore_op3, |
525
|
|
|
save_single_res=args.save_single_res, |
526
|
|
|
ref_frame_only = args.ref_frame_only, |
527
|
|
|
verbose=args.verbose) |
528
|
|
|
|
529
|
|
|
if args.inplace: |
530
|
|
|
if args.suffix: |
531
|
|
|
f = f.replace('.pdb', '_' + args.suffix + '.pdb') |
532
|
|
|
if args.verbose: print(f) |
533
|
|
|
|
534
|
|
|
with open(f, 'w') as f: |
535
|
|
|
if not args.no_hr: |
536
|
|
|
f.write(add_header(version) + '\n') |
537
|
|
|
if previous_edits: |
538
|
|
|
f.write('\n'.join(previous_edits) + '\n') |
539
|
|
|
if remarks: |
540
|
|
|
f.write('\n'.join(remarks) + '\n') |
541
|
|
|
f.write(s.get_text()) |
542
|
|
|
# progress bar only in --inplace mode! |
543
|
|
|
if not args.no_progress_bar: |
544
|
|
|
bar.update(c) |
|
|
|
|
545
|
|
|
|
546
|
|
|
else: |
547
|
|
|
output = '' |
548
|
|
|
if not args.no_hr: |
549
|
|
|
output += add_header(version) + '\n' |
550
|
|
|
if remarks: |
551
|
|
|
output += '\n'.join(remarks) + '\n' |
552
|
|
|
output += s.get_text() + '\n' |
553
|
|
|
|
554
|
|
|
if remarks_only: |
555
|
|
|
sys.stdout.write('\n'.join(remarks)) |
556
|
|
|
sys.stdout.flush() |
557
|
|
|
else: |
558
|
|
|
if args.here: |
559
|
|
|
if '_rpr' not in f: # good idea? |
560
|
|
|
nf = f.replace('.pdb', '_rpr.pdb') |
561
|
|
|
with open(nf, 'w') as fio: |
562
|
|
|
print(nf) |
563
|
|
|
fio.write(output) |
564
|
|
|
else: |
565
|
|
|
try: |
566
|
|
|
sys.stdout.write(output) |
567
|
|
|
sys.stdout.flush() |
568
|
|
|
except IOError: |
569
|
|
|
pass |
570
|
|
|
# hmm... fix for problem with renumbering, i do renumbering |
571
|
|
|
# and i stop here |
572
|
|
|
# i'm not sure that this is perfect |
573
|
|
|
sys.exit(0) |
574
|
|
|
|
575
|
|
|
|
576
|
|
|
if args.renumber_residues: |
577
|
|
|
s = RNAStructure(args.file) |
578
|
|
|
s.remove_hydrogen() |
579
|
|
|
s.remove_ion() |
580
|
|
|
s.remove_water() |
581
|
|
|
s.get_rnapuzzle_ready(args.renumber_residues) |
582
|
|
|
s.renum_atoms() |
583
|
|
|
if not args.no_hr: |
584
|
|
|
print(add_header(version)) |
585
|
|
|
print(s.get_text()) |
586
|
|
|
|
587
|
|
|
# args.renumber_resides_dirty |
588
|
|
|
if args.renum_residues_dirty: |
589
|
|
|
# quick fix - make a list on the spot |
590
|
|
|
if list != type(args.file): |
591
|
|
|
args.file = [args.file] |
592
|
|
|
################################## |
593
|
|
|
for f in args.file: |
594
|
|
|
if args.inplace: |
595
|
|
|
shutil.copy(f, f + '~') |
596
|
|
|
|
597
|
|
|
s = RNAStructure(f) |
598
|
|
|
|
599
|
|
|
output = '' |
600
|
|
|
#if not args.no_hr: |
601
|
|
|
# output += add_header(version) + '\n' |
602
|
|
|
# output += 'HEADER --delete ' + args.delete + '\n' # ' '.join(str(selection)) |
603
|
|
|
c = 1 |
604
|
|
|
old_resi = -1 |
605
|
|
|
for l in s.lines: |
606
|
|
|
if l.startswith('ATOM') or l.startswith("HETATOM"): |
607
|
|
|
resi = int(l[23:26].strip()) |
608
|
|
|
if resi != old_resi: |
609
|
|
|
old_resi = resi |
610
|
|
|
c += 1 |
611
|
|
|
# print(resi, c) |
612
|
|
|
#resi = c |
613
|
|
|
#if chain in selection: |
614
|
|
|
# if resi in selection[chain]: |
615
|
|
|
# continue # print chain, resi |
616
|
|
|
output += l[:23] + str(c).rjust(3) + l[26:] + '\n' |
617
|
|
|
# write: inplace |
618
|
|
|
if args.inplace: |
619
|
|
|
with open(f, 'w') as f: |
620
|
|
|
f.write(output) |
621
|
|
|
else: # write: to stdout |
622
|
|
|
try: |
623
|
|
|
sys.stdout.write(output) |
624
|
|
|
sys.stdout.flush() |
625
|
|
|
except IOError: |
626
|
|
|
pass |
627
|
|
|
|
628
|
|
|
|
629
|
|
|
if args.undo: |
630
|
|
|
# quick fix - make a list on the spot |
631
|
|
|
dir = args.file #os.path.abso(os.path.dirname(args.file)) |
632
|
|
|
for f in glob.glob(dir + '/*.pdb~'): |
633
|
|
|
if args.verbose: |
634
|
|
|
print(f, '->',f.replace('.pdb~', '.pdb')) |
635
|
|
|
os.rename(f, f.replace('.pdb~', '.pdb')) |
636
|
|
|
|
637
|
|
|
if args.delete: |
638
|
|
|
# quick fix - make a list on the spot |
639
|
|
|
if list != type(args.file): |
640
|
|
|
args.file = [args.file] |
641
|
|
|
################################## |
642
|
|
|
for f in args.file: |
643
|
|
|
if args.inplace: |
644
|
|
|
shutil.copy(f, f + '~') |
645
|
|
|
|
646
|
|
|
selection = select_pdb_fragment(args.delete) |
647
|
|
|
s = RNAStructure(f) |
648
|
|
|
|
649
|
|
|
output = '' |
650
|
|
|
if not args.no_hr: |
651
|
|
|
output += add_header(version) + '\n' |
652
|
|
|
output += 'HEADER --delete ' + args.delete + '\n' # ' '.join(str(selection)) |
653
|
|
|
for l in s.lines: |
654
|
|
|
if l.startswith('ATOM'): |
655
|
|
|
chain = l[21] |
656
|
|
|
resi = int(l[23:26].strip()) |
657
|
|
|
if chain in selection: |
658
|
|
|
if resi in selection[chain]: |
659
|
|
|
continue # print chain, resi |
660
|
|
|
output += l + '\n' |
661
|
|
|
|
662
|
|
|
# write: inplace |
663
|
|
|
if args.inplace: |
664
|
|
|
os.rename(f, f.replace('.pdb', '.pdb~')) |
665
|
|
|
if args.suffix: |
666
|
|
|
f = f.replace('.pdb', '_' + args.suffix + '.pdb') |
667
|
|
|
with open(f, 'w') as f: |
668
|
|
|
f.write(output) |
669
|
|
|
else: # write: to stdout |
670
|
|
|
try: |
671
|
|
|
sys.stdout.write(output) |
672
|
|
|
sys.stdout.flush() |
673
|
|
|
except IOError: |
674
|
|
|
pass |
675
|
|
|
|
676
|
|
|
if args.replace_chain: |
677
|
|
|
# quick fix - make a list on the spot |
678
|
|
|
if list != type(args.file): |
679
|
|
|
args.file = [args.file] |
680
|
|
|
################################## |
681
|
|
|
for f in args.file: |
682
|
|
|
if args.inplace: |
683
|
|
|
shutil.copy(f, f + '~') |
684
|
|
|
|
685
|
|
|
# --replace_chain <file> without A:<file> it will be easier than --x "A:<file>" |
686
|
|
|
s = RNAStructure(args.replace_chain) |
687
|
|
|
chain_ids = (s.get_all_chain_ids()) |
688
|
|
|
if len(chain_ids) > 1: |
689
|
|
|
raise Exception('There is more than one chain in the inserted PDB file. There should be only one chain, the one you want to insert to the PDB.') |
690
|
|
|
out = replace_chain(f, args.replace_chain, list(chain_ids)[0]) |
691
|
|
|
print(out) |
692
|
|
|
|
693
|
|
|
if args.mutate: |
694
|
|
|
# quick fix - make a list on the spot |
695
|
|
|
if list != type(args.file): |
696
|
|
|
args.file = [args.file] |
697
|
|
|
################################## |
698
|
|
|
from rna_tools.tools.mini_moderna3.moderna import * |
699
|
|
|
|
700
|
|
|
for f in args.file: |
701
|
|
|
if args.ignore_files: |
702
|
|
|
if args.ignore_files in f: |
703
|
|
|
continue |
704
|
|
|
|
705
|
|
|
if args.inplace: |
706
|
|
|
shutil.copy(f, f + '~') # create a backup copy if inplace |
707
|
|
|
|
708
|
|
|
# create a working copy of the main file |
709
|
|
|
ftf = tempfile.NamedTemporaryFile(delete=False).name # f temp file |
710
|
|
|
shutil.copy(f, ftf) # create a backup copy if inplace |
711
|
|
|
|
712
|
|
|
# go over each chain |
713
|
|
|
# rna_pdb_tools.py --mutate 'A:1CB:1G,A:1U+B:1A' CG_AB.pdb > ~/Desktop/a.pdb |
714
|
|
|
args.mutate = args.mutate.upper() |
715
|
|
|
for m in args.mutate.split(','): # A:1A, B:1A |
716
|
|
|
chain, resi_mutate_to = m.strip().split(':') # A:1A+2C |
717
|
|
|
resi_mutate_to_list = resi_mutate_to.split('+') # A:1A+2C |
718
|
|
|
|
719
|
|
|
model = load_model(f, chain) |
|
|
|
|
720
|
|
|
# go over each mutation in this chain |
721
|
|
|
for resi_mutate_to in resi_mutate_to_list: |
722
|
|
|
resi = resi_mutate_to[:-1] |
723
|
|
|
mutate_to = resi_mutate_to[-1] |
724
|
|
|
if args.verbose: |
725
|
|
|
print('Mutate', model[resi], 'to', mutate_to) |
726
|
|
|
exchange_single_base(model[resi], mutate_to) |
|
|
|
|
727
|
|
|
|
728
|
|
|
# multi mutation in one chain |
729
|
|
|
tf = tempfile.NamedTemporaryFile(delete=False) |
730
|
|
|
model.write_pdb_file(tf.name) |
731
|
|
|
|
732
|
|
|
# work on the copy of f, ftf |
733
|
|
|
output = replace_chain(ftf, tf.name, chain) |
734
|
|
|
with open(ftf, 'w') as tmp: |
735
|
|
|
tmp.write(output) |
736
|
|
|
|
737
|
|
|
# write: inplace |
738
|
|
|
if args.inplace: |
739
|
|
|
# ftf now is f, get ready for the final output |
740
|
|
|
if args.suffix: |
741
|
|
|
f = f.replace('.pdb', '_' + args.suffix + '.pdb') |
742
|
|
|
# rpr on the file |
743
|
|
|
shutil.copy(ftf, f) |
744
|
|
|
os.system('rna_pdb_tools.py --rpr --no-progress-bar --inplace ' + f) |
745
|
|
|
else: # write: to stdout |
746
|
|
|
try: |
747
|
|
|
sys.stdout.write(output) |
|
|
|
|
748
|
|
|
sys.stdout.flush() |
749
|
|
|
except IOError: |
750
|
|
|
pass |
751
|
|
|
|
752
|
|
|
|
753
|
|
|
# extract |
754
|
|
|
if args.extract: |
755
|
|
|
# quick fix - make a list on the spot |
756
|
|
|
if list != type(args.file): |
757
|
|
|
args.file = [args.file] |
758
|
|
|
################################## |
759
|
|
|
for f in args.file: |
760
|
|
|
if args.inplace: |
761
|
|
|
shutil.copy(f, f + '~') |
762
|
|
|
|
763
|
|
|
selection = select_pdb_fragment(args.extract) |
764
|
|
|
s = RNAStructure(f) |
765
|
|
|
|
766
|
|
|
output = '' |
767
|
|
|
if not args.no_hr: |
768
|
|
|
output += add_header(version) + '\n' |
769
|
|
|
output += 'HEADER extract ' + args.extract + '\n' # ' '.join(str(selection)) |
770
|
|
|
for l in s.lines: |
771
|
|
|
if l.startswith('ATOM'): |
772
|
|
|
chain = l[21] |
773
|
|
|
resi = int(l[23:26].strip()) |
774
|
|
|
if chain in selection: |
775
|
|
|
if resi in selection[chain]: |
776
|
|
|
# continue # print chain, resi |
777
|
|
|
output += l + '\n' |
778
|
|
|
|
779
|
|
|
# write: inplace |
780
|
|
|
if args.inplace: |
781
|
|
|
with open(f, 'w') as f: |
782
|
|
|
f.write(output) |
783
|
|
|
elif args.here: |
784
|
|
|
if '_extr' not in f: # good idea? |
785
|
|
|
nf = f.replace('.pdb', '_extr.pdb') |
786
|
|
|
with open(nf, 'w') as fio: |
787
|
|
|
print(nf) |
788
|
|
|
fio.write(output) |
789
|
|
|
else: # write: to stdout |
790
|
|
|
try: |
791
|
|
|
sys.stdout.write(output) |
792
|
|
|
sys.stdout.flush() |
793
|
|
|
except IOError: |
794
|
|
|
pass |
795
|
|
|
|
796
|
|
|
|
797
|
|
|
if args.extract_chain: |
798
|
|
|
# quick fix - make a list on the spot |
799
|
|
|
if list != type(args.file): |
800
|
|
|
args.file = [args.file] |
801
|
|
|
################################## |
802
|
|
|
for f in args.file: |
803
|
|
|
if args.inplace: |
804
|
|
|
shutil.copy(f, f + '~') |
805
|
|
|
|
806
|
|
|
selection = select_pdb_fragment(args.extract) |
807
|
|
|
s = RNAStructure(f) |
808
|
|
|
|
809
|
|
|
output = '' |
810
|
|
|
if not args.no_hr: |
811
|
|
|
output += add_header(version) + '\n' |
812
|
|
|
output += 'HEADER extract ' + args.extract + '\n' # ' '.join(str(selection)) |
813
|
|
|
for l in s.lines: |
814
|
|
|
if l.startswith('ATOM') or l.startswith('TER') or l.startswith('HETATM'): |
815
|
|
|
chain = l[21] |
816
|
|
|
if chain in args.extract_chain: |
817
|
|
|
output += l + '\n' |
818
|
|
|
|
819
|
|
|
# write: inplace |
820
|
|
|
if args.inplace: |
821
|
|
|
with open(f, 'w') as f: |
822
|
|
|
f.write(output) |
823
|
|
|
else: # write: to stdout |
824
|
|
|
try: |
825
|
|
|
sys.stdout.write(output) |
826
|
|
|
sys.stdout.flush() |
827
|
|
|
except IOError: |
828
|
|
|
pass |
829
|
|
|
|
830
|
|
|
if args.is_pdb: |
831
|
|
|
s = RNAStructure(args.file) |
832
|
|
|
output = str(s.is_pdb()) |
833
|
|
|
sys.stdout.write(output + '\n') |
834
|
|
|
|
835
|
|
|
if args.un_nmr: |
836
|
|
|
s = RNAStructure(args.file) |
837
|
|
|
str(s.un_nmr()) |
838
|
|
|
|
839
|
|
|
if args.is_nmr: |
840
|
|
|
struc = RNAStructure(args.file) |
841
|
|
|
output = str(struc.is_nmr(args.verbose)) |
842
|
|
|
sys.stdout.write(output + '\n') |
843
|
|
|
|
844
|
|
|
#edit |
845
|
|
|
if args.edit: |
846
|
|
|
if list != type(args.file): |
847
|
|
|
args.file = [args.file] |
848
|
|
|
################################## |
849
|
|
|
for f in args.file: |
850
|
|
|
if args.verbose: |
851
|
|
|
print(f) |
852
|
|
|
if args.inplace: |
853
|
|
|
shutil.copy(f, f + '~') |
854
|
|
|
|
855
|
|
|
output = edit_pdb(f, args) |
856
|
|
|
|
857
|
|
|
if args.inplace: |
858
|
|
|
with open(f, 'w') as f: |
859
|
|
|
f.write(output) |
860
|
|
|
else: # write: to stdout |
861
|
|
|
try: |
862
|
|
|
sys.stdout.write(output) |
863
|
|
|
sys.stdout.flush() |
864
|
|
|
except IOError: |
865
|
|
|
pass |
866
|
|
|
|
867
|
|
|
|
868
|
|
|
if args.fetch: |
869
|
|
|
if list != type(args.file): |
870
|
|
|
args.file = [args.file] |
871
|
|
|
################################## |
872
|
|
|
for f in args.file: |
873
|
|
|
fn = fetch(f) |
874
|
|
|
|
875
|
|
|
if args.fetch_fasta: |
876
|
|
|
|
877
|
|
|
|
878
|
|
|
pdb_id = args.file |
879
|
|
|
pdb_id = pdb_id.replace('.pdb', '') |
880
|
|
|
|
881
|
|
|
import urllib3 |
882
|
|
|
http = urllib3.PoolManager() |
883
|
|
|
|
884
|
|
|
response = http.request('GET', 'https://www.rcsb.org/fasta/entry/' + pdb_id.upper()) |
885
|
|
|
if not response.status == 200: |
886
|
|
|
raise PDBFetchError() |
|
|
|
|
887
|
|
|
else: |
888
|
|
|
txt = response.data.decode("utf-8").strip() |
889
|
|
|
print(txt) |
890
|
|
|
if 'No valid PDB IDs were submitted' in txt: |
891
|
|
|
pass |
892
|
|
|
else: |
893
|
|
|
with open(pdb_id + '.fa', 'w') as f: |
894
|
|
|
f.write(txt) |
895
|
|
|
|
896
|
|
|
|
897
|
|
|
if args.fetch_chain: |
898
|
|
|
fn = fetch(args.file[0]) |
899
|
|
|
chain = args.file[1] |
900
|
|
|
nfn = fn.replace('.pdb','') + '_' + chain + '.pdb' # 6bk8_H.pdb |
901
|
|
|
#cmd = 'rna_pdb_tools.py --get-chain %s %s' > %s' % (chain, fn, nfn) |
902
|
|
|
cmd = 'rna_pdb_tools.py --get-chain %s %s' % (chain, fn) |
903
|
|
|
os.system(cmd) |
904
|
|
|
# print(nfn) |
905
|
|
|
|
906
|
|
|
if args.fetch_ba: |
907
|
|
|
fetch_ba(args.file) |
908
|
|
|
|
909
|
|
|
if args.fetch_header: |
910
|
|
|
|
911
|
|
|
import requests |
912
|
|
|
if list != type(args.file): |
913
|
|
|
args.file = [args.file] |
914
|
|
|
################################## |
915
|
|
|
|
916
|
|
|
|
917
|
|
|
def get_pdb_header(pdb_id): |
918
|
|
|
url = f"https://files.rcsb.org/download/{pdb_id}.pdb" |
919
|
|
|
response = requests.get(url) |
920
|
|
|
response.raise_for_status() # Check if the request was successful |
921
|
|
|
|
922
|
|
|
header_lines = [] |
923
|
|
|
for line in response.text.splitlines(): |
924
|
|
|
if line.startswith("TITLE"): #or line.startswith("COMPND") or line.startswith("SOURCE"): |
925
|
|
|
header_lines.append(line) |
926
|
|
|
elif line.startswith("ATOM"): # Stop once ATOM records begin |
927
|
|
|
break |
928
|
|
|
|
929
|
|
|
return " ".join([l.replace('TITLE', '').strip() for l in header_lines]) |
930
|
|
|
|
931
|
|
|
for f in args.file: |
932
|
|
|
f = f.replace('.pdb', '').replace('.cif', '') |
933
|
|
|
pdb_id = f.split('_')[0] # _A.cif |
934
|
|
|
header = get_pdb_header(pdb_id) |
935
|
|
|
print(f'{pdb_id} {header}') |
936
|
|
|
|
937
|
|
|
if args.fetch_cif: |
938
|
|
|
if list != type(args.file): |
939
|
|
|
args.file = [args.file] |
940
|
|
|
################################## |
941
|
|
|
for f in args.file: |
942
|
|
|
fetch_cif(f) |
943
|
|
|
|
944
|
|
|
if args.collapsed_view or args.cv: |
945
|
|
|
collapsed_view(args) |
946
|
|
|
|
947
|
|
|
if args.delete_anisou: |
948
|
|
|
# quick fix - make a list on the spot |
949
|
|
|
if list != type(args.file): |
950
|
|
|
args.file = [args.file] |
951
|
|
|
################################## |
952
|
|
|
for f in args.file: |
953
|
|
|
if args.inplace: |
954
|
|
|
shutil.copy(f, f + '~') |
955
|
|
|
|
956
|
|
|
s = RNAStructure(f) |
957
|
|
|
|
958
|
|
|
output = '' |
959
|
|
|
if not args.no_hr: |
960
|
|
|
output += add_header(version) + '\n' |
961
|
|
|
|
962
|
|
|
for l in s.lines: |
963
|
|
|
if l.startswith('ANISOU'): |
964
|
|
|
continue |
965
|
|
|
else: |
966
|
|
|
output += l + '\n' |
967
|
|
|
|
968
|
|
|
if args.inplace: |
969
|
|
|
with open(f, 'w') as f: |
970
|
|
|
f.write(output) |
971
|
|
|
else: # write: to stdout |
972
|
|
|
try: |
973
|
|
|
sys.stdout.write(output) |
974
|
|
|
sys.stdout.flush() |
975
|
|
|
except IOError: |
976
|
|
|
pass |
977
|
|
|
|
978
|
|
|
|
979
|
|
|
if args.swap_chains: |
980
|
|
|
# quick fix - make a list on the spot |
981
|
|
|
if list != type(args.file): |
982
|
|
|
args.file = [args.file] |
983
|
|
|
################################## |
984
|
|
|
for f in args.file: |
985
|
|
|
if args.inplace: |
986
|
|
|
shutil.copy(f, f + '~') |
987
|
|
|
# rename_chain 'A>B' |
988
|
|
|
s = RNAStructure(f) |
989
|
|
|
output = '' |
990
|
|
|
if not args.no_hr: |
991
|
|
|
output += add_header(version) + '\n' |
992
|
|
|
|
993
|
|
|
chain_id_old, chain_id_new = args.swap_chains.split('>') |
994
|
|
|
if chain_id_old == chain_id_new: |
995
|
|
|
output = open(f).read() # return the file ;-) itself unchanged |
996
|
|
|
else: |
997
|
|
|
s.rename_chain(chain_id_new, '_') |
998
|
|
|
s.rename_chain(chain_id_old, chain_id_new) |
999
|
|
|
output += s.rename_chain('_', chain_id_old) |
1000
|
|
|
|
1001
|
|
|
if args.inplace: |
1002
|
|
|
with open(f, 'w') as f: |
1003
|
|
|
f.write(output) |
1004
|
|
|
else: # write: to stdout |
1005
|
|
|
try: |
1006
|
|
|
sys.stdout.write(output) |
1007
|
|
|
sys.stdout.flush() |
1008
|
|
|
except IOError: |
1009
|
|
|
pass |
1010
|
|
|
|
1011
|
|
|
if args.rename_chain: |
1012
|
|
|
# quick fix - make a list on the spot |
1013
|
|
|
if list != type(args.file): |
1014
|
|
|
args.file = [args.file] |
1015
|
|
|
################################## |
1016
|
|
|
for f in args.file: |
1017
|
|
|
if args.inplace: |
1018
|
|
|
shutil.copy(f, f + '~') |
1019
|
|
|
# rename_chain 'A>B' |
1020
|
|
|
s = RNAStructure(f) |
1021
|
|
|
chain_id_old, chain_id_new = args.rename_chain.split('>') |
1022
|
|
|
output = '' |
1023
|
|
|
if not args.no_hr: |
1024
|
|
|
output += add_header(version) + '\n' |
1025
|
|
|
output += s.rename_chain(chain_id_old, chain_id_new) |
1026
|
|
|
if args.inplace: |
1027
|
|
|
with open(f, 'w') as f: |
1028
|
|
|
f.write(output) |
1029
|
|
|
else: # write: to stdout |
1030
|
|
|
try: |
1031
|
|
|
sys.stdout.write(output) |
1032
|
|
|
sys.stdout.flush() |
1033
|
|
|
except IOError: |
1034
|
|
|
pass |
1035
|
|
|
|
1036
|
|
|
|
1037
|
|
|
if args.split_alt_locations: |
1038
|
|
|
# quick fix - make a list on the spot |
1039
|
|
|
if list != type(args.file): |
1040
|
|
|
args.file = [args.file] |
1041
|
|
|
################################## |
1042
|
|
|
for f in args.file: |
1043
|
|
|
if args.inplace: |
1044
|
|
|
shutil.copy(f, f + '~') |
1045
|
|
|
|
1046
|
|
|
#s = RNAStructure(f) |
1047
|
|
|
s = open(f) |
1048
|
|
|
output = '' |
1049
|
|
|
if not args.no_hr: |
1050
|
|
|
output += add_header(version) + '\n' |
1051
|
|
|
|
1052
|
|
|
# First, collect all alternative locations. |
1053
|
|
|
alts = set() |
1054
|
|
|
for l in s: |
1055
|
|
|
if l.startswith('ATOM'): |
1056
|
|
|
if l[16].strip(): |
1057
|
|
|
alts.add(l[16]) |
1058
|
|
|
s.close() |
1059
|
|
|
|
1060
|
|
|
if args.verbose: |
1061
|
|
|
print('alts:', alts) |
1062
|
|
|
|
1063
|
|
|
for index, alt in enumerate(alts): |
1064
|
|
|
output += 'MODEL %i' % (index + 1) |
1065
|
|
|
s = open(f) |
1066
|
|
|
for l in s: |
1067
|
|
|
if l.startswith('ATOM') or l.startswith('HETATM'): |
1068
|
|
|
# if empty, then print this line |
1069
|
|
|
if l[16] == ' ': |
1070
|
|
|
output += l |
1071
|
|
|
if l[16] == alt: |
1072
|
|
|
output += l |
1073
|
|
|
else: |
1074
|
|
|
output += l |
1075
|
|
|
output += 'ENDMDL\n' |
1076
|
|
|
s.close() |
1077
|
|
|
|
1078
|
|
|
if args.inplace: |
1079
|
|
|
with open(f, 'w') as f: |
1080
|
|
|
f.write(output) |
1081
|
|
|
else: # write: to stdout |
1082
|
|
|
try: |
1083
|
|
|
sys.stdout.write(output) |
1084
|
|
|
sys.stdout.flush() |
1085
|
|
|
except IOError: |
1086
|
|
|
pass |
1087
|
|
|
|
1088
|
|
|
if args.orgmode: |
1089
|
|
|
if args.inplace: |
1090
|
|
|
shutil.copy(args.file, args.file + '~') |
1091
|
|
|
s = RNAStructure(args.file) |
1092
|
|
|
s.decap_gtp() |
1093
|
|
|
s.std_resn() |
1094
|
|
|
s.remove_hydrogen() |
1095
|
|
|
s.remove_ion() |
1096
|
|
|
s.remove_water() |
1097
|
|
|
s.fix_op_atoms() |
1098
|
|
|
s.renum_atoms() |
1099
|
|
|
s.shift_atom_names() |
1100
|
|
|
s.prune_elements() |
1101
|
|
|
# print s.get_preview() |
1102
|
|
|
# s.write(args.outfile) |
1103
|
|
|
# for l in s.lines: |
1104
|
|
|
# print l |
1105
|
|
|
|
1106
|
|
|
remarks = s.get_rnapuzzle_ready( |
1107
|
|
|
args.renumber_residues, fix_missing_atoms=True, rename_chains=True, verbose=args.verbose) |
1108
|
|
|
|
1109
|
|
|
with open(args.file + '~', 'w') as f: |
1110
|
|
|
if not args.no_hr: |
1111
|
|
|
f.write(add_header(version) + '\n') |
1112
|
|
|
|
1113
|
|
|
f.write('\n'.join(remarks) + '\n') |
1114
|
|
|
f.write(s.get_text()) |
1115
|
|
|
|
1116
|
|
|
try: |
1117
|
|
|
from Bio import PDB |
1118
|
|
|
from Bio.PDB import PDBIO |
1119
|
|
|
import warnings |
1120
|
|
|
warnings.filterwarnings('ignore', '.*Invalid or missing.*',) |
1121
|
|
|
warnings.filterwarnings('ignore', '.*with given element *',) |
1122
|
|
|
except: |
1123
|
|
|
sys.exit('Error: Install biopython to use this function (pip biopython)') |
1124
|
|
|
|
1125
|
|
|
parser = PDB.PDBParser() |
1126
|
|
|
struct = parser.get_structure('', args.file + '~') |
1127
|
|
|
model = struct[0] |
1128
|
|
|
|
1129
|
|
|
# chains [['A', 'seq', [residues]]] |
1130
|
|
|
chains = [] |
1131
|
|
|
for c in model.get_list(): |
1132
|
|
|
seq = '' |
1133
|
|
|
chain = [] |
1134
|
|
|
for r in c: |
1135
|
|
|
chain.append(str(r.get_resname().strip()) + str(r.id[1])) |
1136
|
|
|
seq += r.get_resname().strip() |
1137
|
|
|
chains.append([c.id, seq, chain]) |
1138
|
|
|
|
1139
|
|
|
t = [] |
1140
|
|
|
#[['A', 'CCGCCGCGCCAUGCCUGUGGCGG', ['C1', 'C2', 'G3', 'C4', 'C5', 'G6', 'C7', 'G8', 'C9', 'C10', 'A11', 'U12', 'G13', 'C14', 'C15', 'U16', 'G17', 'U18', 'G19', 'G20', 'C21', 'G22', 'G23']], ['B', 'CCGCCGCGCCAUGCCUGUGGCGG', ['C1', 'C2', 'G3', 'C4', 'C5', 'G6', 'C7', 'G8', 'C9', 'C10', 'A11', 'U12', 'G13', 'C14', 'C15', 'U16', 'G17', 'U18', 'G19', 'G20', 'C21', 'G22', 'G23']]] |
1141
|
|
|
for c in chains: |
1142
|
|
|
t.append('* ' + c[0] + ':' + c[2][0][1:] + '-' + c[2][-1][1:] + ' ' + c[1]) |
1143
|
|
|
for r in c[2]: |
1144
|
|
|
t.append('** ' + c[0] + ':' + r) |
1145
|
|
|
print('\n'.join(t)) |
1146
|
|
|
|
1147
|
|
|
if args.fix: |
1148
|
|
|
# quick fix - make a list on the spot |
1149
|
|
|
if list != type(args.file): |
1150
|
|
|
args.file = [args.file] |
1151
|
|
|
################################## |
1152
|
|
|
for f in args.file: |
1153
|
|
|
cmd = 'pdbfixer ' + f + ' --add-atoms all --add-residues' |
1154
|
|
|
print(cmd) |
1155
|
|
|
os.system(cmd) |
1156
|
|
|
if args.inplace: |
1157
|
|
|
shutil.move("output.pdb", f) |
1158
|
|
|
else: |
1159
|
|
|
shutil.move("output.pdb", f.replace('.pdb', '_fx.pdb')) |
1160
|
|
|
|
1161
|
|
|
if args.to_mol2: |
1162
|
|
|
# quick fix - make a list on the spot |
1163
|
|
|
if list != type(args.file): |
1164
|
|
|
args.file = [args.file] |
1165
|
|
|
################################## |
1166
|
|
|
for f in args.file: |
1167
|
|
|
cmd = 'obabel -i pdb ' + f + ' -o mol2 -O ' + f.replace('.pdb', '.mol2') |
1168
|
|
|
print(cmd) |
1169
|
|
|
os.system(cmd) |
1170
|
|
|
|
1171
|
|
|
|
1172
|
|
|
if args.nmr_dir: |
1173
|
|
|
files = sort_strings(glob.glob(args.nmr_dir + '/' + args.file)) |
1174
|
|
|
|
1175
|
|
|
c = 1 |
1176
|
|
|
|
1177
|
|
|
for f in files: |
1178
|
|
|
#s = RNAStructure(f) |
1179
|
|
|
print("MODEL " + str(c)) |
1180
|
|
|
# at some point I could use RNAStructure for this |
1181
|
|
|
print(open(f).read()) |
1182
|
|
|
#print(s.get_text(add_end=False)) |
1183
|
|
|
print('ENDMDL') |
1184
|
|
|
c += 1 |
1185
|
|
|
print('END') |
1186
|
|
|
|
1187
|
|
|
if args.set_chain: |
1188
|
|
|
if list != type(args.file): |
1189
|
|
|
args.file = [args.file] |
1190
|
|
|
|
1191
|
|
|
for f in args.file: |
1192
|
|
|
txt = set_chain_for_struc(f, args.set_chain) |
1193
|
|
|
if args.inplace: |
1194
|
|
|
shutil.move(f, f.replace('.pdb', '.pdb~')) |
1195
|
|
|
with open(f, 'w') as fi: |
1196
|
|
|
fi.write(txt) |
1197
|
|
|
else: |
1198
|
|
|
print(txt) |
1199
|
|
|
|
1200
|
|
|
if args.remove0: |
1201
|
|
|
if list != type(args.file): |
1202
|
|
|
args.file = [args.file] |
1203
|
|
|
|
1204
|
|
|
for f in args.file: |
1205
|
|
|
s = RNAStructure(f) |
1206
|
|
|
txt = '' |
1207
|
|
|
for l in s.lines: |
1208
|
|
|
if s.get_atom_coords(l) == (0.0, 0.0, 0.0): |
1209
|
|
|
continue |
1210
|
|
|
else: |
1211
|
|
|
txt += l + '\n' |
1212
|
|
|
|
1213
|
|
|
if args.inplace: |
1214
|
|
|
shutil.move(f, f.replace('.pdb', '.pdb~')) |
1215
|
|
|
with open(f, 'w') as fi: |
1216
|
|
|
fi.write(txt) |
1217
|
|
|
else: |
1218
|
|
|
print(txt) |
1219
|
|
|
|
1220
|
|
|
if args.replace_htm: |
1221
|
|
|
if list != type(args.file): |
1222
|
|
|
args.file = [args.file] |
1223
|
|
|
|
1224
|
|
|
for f in args.file: |
1225
|
|
|
with open(f) as fn: |
1226
|
|
|
txt = fn.read() |
1227
|
|
|
txt = txt.replace('HETATM', 'ATOM ') |
1228
|
|
|
if args.inplace: |
1229
|
|
|
shutil.move(f, f.replace('.pdb', '.pdb~')) |
1230
|
|
|
with open(f, 'w') as fi: |
1231
|
|
|
fi.write(txt) |
1232
|
|
|
else: |
1233
|
|
|
print(txt) |
1234
|
|
|
|
1235
|
|
|
if args.rgyration: |
1236
|
|
|
if list != type(args.file): |
1237
|
|
|
args.file = [args.file] |
1238
|
|
|
# quick fix - make a list on the spot |
1239
|
|
|
if list != type(args.file): |
1240
|
|
|
args.file = [args.file] |
1241
|
|
|
################################## |
1242
|
|
|
analyzed = [] |
1243
|
|
|
for f in args.file: |
1244
|
|
|
##################################### |
1245
|
|
|
if args.uniq: |
1246
|
|
|
subname = eval('f' + args.uniq) |
1247
|
|
|
if subname in analyzed: |
1248
|
|
|
continue |
1249
|
|
|
else: |
1250
|
|
|
analyzed.append(subname) |
1251
|
|
|
|
1252
|
|
|
s = RNAStructure(f) |
1253
|
|
|
if not s.is_pdb(): |
1254
|
|
|
print('Error: Not a PDB file %s' % f) |
1255
|
|
|
sys.exit(1) |
1256
|
|
|
s.decap_gtp() |
1257
|
|
|
s.std_resn() |
1258
|
|
|
s.remove_hydrogen() |
1259
|
|
|
s.remove_ion() |
1260
|
|
|
s.remove_water() |
1261
|
|
|
if args.renum_atoms: |
1262
|
|
|
s.renum_atoms() |
1263
|
|
|
s.fix_O_in_UC() |
1264
|
|
|
s.fix_op_atoms() |
1265
|
|
|
|
1266
|
|
|
output = '' |
1267
|
|
|
|
1268
|
|
|
# with # is easier to grep this out |
1269
|
|
|
if args.fasta: |
1270
|
|
|
# s.fn vs s.name |
1271
|
|
|
output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, fasta=args.fasta, addfn=s.name, color=args.color_seq) + '\n' |
1272
|
|
|
elif args.oneline: |
1273
|
|
|
output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq).strip() + ' # '+ os.path.basename(f.replace('.pdb', '')) + '\n' |
1274
|
|
|
else: |
1275
|
|
|
N = len(s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq)) |
1276
|
|
|
output += '' + os.path.basename(f.replace('.pdb', '')).ljust(60) + ' seq len: ' + str(N) + ' ' |
1277
|
|
|
try: |
1278
|
|
|
sys.stdout.write(output) |
1279
|
|
|
sys.stdout.flush() |
1280
|
|
|
except IOError: |
1281
|
|
|
pass |
1282
|
|
|
|
1283
|
|
|
import math |
1284
|
|
|
def calc_rg(): |
1285
|
|
|
"""modified after https://github.com/sarisabban/Rg""" |
1286
|
|
|
coord = list() |
1287
|
|
|
mass = list() |
1288
|
|
|
for line in s.lines: |
1289
|
|
|
try: |
1290
|
|
|
line = line.split() |
1291
|
|
|
x = float(line[6]) |
1292
|
|
|
y = float(line[7]) |
1293
|
|
|
z = float(line[8]) |
1294
|
|
|
coord.append([x, y, z]) |
1295
|
|
|
atom = line[2][0] |
1296
|
|
|
if atom == 'C': |
1297
|
|
|
mass.append(12.0107) |
1298
|
|
|
elif atom == 'O': |
1299
|
|
|
mass.append(15.9994) |
1300
|
|
|
elif atom == 'N': |
1301
|
|
|
mass.append(14.0067) |
1302
|
|
|
elif atom == 'S': |
1303
|
|
|
mass.append(32.065) |
1304
|
|
|
elif atom == 'P': |
1305
|
|
|
mass.append(30.97) |
1306
|
|
|
except: |
1307
|
|
|
pass |
1308
|
|
|
|
1309
|
|
|
xm = [(m*i, m*j, m*k) for (i, j, k), m in zip(coord, mass)] |
1310
|
|
|
tmass = sum(mass) |
1311
|
|
|
rr = sum(mi*i + mj*j + mk*k for (i, j, k), (mi, mj, mk) in zip(coord, xm)) |
1312
|
|
|
mm = sum((sum(i) / tmass)**2 for i in zip(*xm)) |
1313
|
|
|
rg = math.sqrt(rr / tmass-mm) |
1314
|
|
|
return(round(rg, 3)) |
1315
|
|
|
|
1316
|
|
|
def radius_of_gyration(N): |
1317
|
|
|
return round(5.5 * (N ** (1/3)), 2) |
1318
|
|
|
|
1319
|
|
|
rg = calc_rg() |
1320
|
|
|
rgn = radius_of_gyration(N) |
|
|
|
|
1321
|
|
|
print(f'radius of gyration: {rg}; expected for this seqlen ({N}): {rgn}, {round(rg / rgn * 100, 2)}%') |
1322
|
|
|
|
1323
|
|
|
if args.renum_nmr: |
1324
|
|
|
if list != type(args.file): |
1325
|
|
|
args.file = [args.file] |
1326
|
|
|
txt = '' |
1327
|
|
|
for f in args.file: |
1328
|
|
|
c = 1 |
1329
|
|
|
for l in open(f): |
1330
|
|
|
if l.startswith('MODEL'): |
1331
|
|
|
txt += "MODEL " + str(c) + '\n' |
1332
|
|
|
c += 1 |
1333
|
|
|
elif l.strip() == 'END': |
1334
|
|
|
pass |
1335
|
|
|
else: |
1336
|
|
|
txt += l |
1337
|
|
|
if args.inplace: |
1338
|
|
|
shutil.move(f, f.replace('.pdb', '.pdb~')) |
|
|
|
|
1339
|
|
|
with open(f) as fi: |
1340
|
|
|
fi.write(txt) |
1341
|
|
|
else: |
1342
|
|
|
print(txt) |
1343
|
|
|
|
1344
|
|
|
from rna_tools.rna_tools_config import PYMOL_PATH |
1345
|
|
|
sys.path.insert(0, PYMOL_PATH) |
1346
|
|
View Code Duplication |
if args.cif2pdb: |
|
|
|
|
1347
|
|
|
# quick fix - make a list on the spot |
1348
|
|
|
if list != type(args.file): |
1349
|
|
|
args.file = [args.file] |
1350
|
|
|
################################## |
1351
|
|
|
for cif_file in args.file: |
1352
|
|
|
from Bio.PDB import MMCIFParser, PDBIO |
1353
|
|
|
parser = MMCIFParser() |
1354
|
|
|
structure = parser.get_structure("structure_id", cif_file) |
1355
|
|
|
pdb_file = cif_file.replace('.cif', '_fCIF.pdb') |
1356
|
|
|
|
1357
|
|
|
|
1358
|
|
|
try: |
1359
|
|
|
# Save to PDB format |
1360
|
|
|
io = PDBIO() |
1361
|
|
|
io.set_structure(structure) |
1362
|
|
|
io.save(pdb_file) |
1363
|
|
|
|
1364
|
|
|
print(f'saved: {pdb_file}') |
1365
|
|
|
# open a file add remarks |
1366
|
|
|
new_file = '' |
1367
|
|
|
with open(pdb_file, 'r') as f: |
1368
|
|
|
if not args.no_hr: |
1369
|
|
|
new_file += add_header(version) + '\n' |
1370
|
|
|
new_file += f.read() |
1371
|
|
|
|
1372
|
|
|
with open(pdb_file, 'w') as f: |
1373
|
|
|
f.write(new_file) |
1374
|
|
|
|
1375
|
|
|
except: |
1376
|
|
|
print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.') |
1377
|
|
|
|
1378
|
|
|
def has_high_rna_content(chain, threshold=0.8): |
1379
|
|
|
# RNA nucleotides: A, C, G, U, and X (you can modify as needed) |
1380
|
|
|
rna_nucleotides = ['A', 'C', 'G', 'U', 'X'] |
1381
|
|
|
total_residues = 0 |
1382
|
|
|
rna_residues = 0 |
1383
|
|
|
|
1384
|
|
|
# Count the total number of residues and RNA-like residues |
1385
|
|
|
for residue in chain: |
1386
|
|
|
total_residues += 1 |
1387
|
|
|
if residue.get_resname().strip() in rna_nucleotides: |
1388
|
|
|
rna_residues += 1 |
1389
|
|
|
|
1390
|
|
|
# Calculate the proportion of RNA residues |
1391
|
|
|
if total_residues == 0: |
1392
|
|
|
return False # Avoid division by zero if chain has no residues |
1393
|
|
|
|
1394
|
|
|
rna_percentage = rna_residues / total_residues |
1395
|
|
|
|
1396
|
|
|
# Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default) |
1397
|
|
|
return rna_percentage >= threshold |
1398
|
|
|
|
1399
|
|
|
from Bio.PDB.MMCIFParser import MMCIFParser |
1400
|
|
|
from Bio.PDB import MMCIFParser, Structure, Model, Chain |
1401
|
|
|
|
1402
|
|
|
# Initialize the parser |
1403
|
|
|
parser = MMCIFParser() |
1404
|
|
|
|
1405
|
|
|
# Parse the structure |
1406
|
|
|
structure = parser.get_structure("structure", cif_file) |
1407
|
|
|
|
1408
|
|
|
# Create a list of single-letter chain identifiers |
1409
|
|
|
import string |
1410
|
|
|
letters = list(string.ascii_uppercase) |
1411
|
|
|
|
1412
|
|
|
for model in structure: |
1413
|
|
|
for chain in model: |
1414
|
|
|
if has_high_rna_content(chain): |
1415
|
|
|
# New structure |
1416
|
|
|
new_structure = Structure.Structure("new_structure") |
1417
|
|
|
new_model = Model.Model(0) # Create a new model |
1418
|
|
|
new_structure.add(new_model) # Add the new model to the new structure |
1419
|
|
|
|
1420
|
|
|
chain_id_new = letters.pop(0) |
1421
|
|
|
chain_id = chain.get_id() |
1422
|
|
|
|
1423
|
|
|
atom_count = 0 |
1424
|
|
|
for residue in chain: |
1425
|
|
|
for atom in residue: |
1426
|
|
|
atom_count += 1 |
1427
|
|
|
|
1428
|
|
|
remarks = [] |
1429
|
|
|
remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}') |
1430
|
|
|
|
1431
|
|
|
pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb') |
1432
|
|
|
print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}') |
1433
|
|
|
|
1434
|
|
|
chain.id = chain_id_new |
1435
|
|
|
new_model.add(chain) |
1436
|
|
|
|
1437
|
|
|
io = PDBIO() |
1438
|
|
|
io.set_structure(new_structure) |
1439
|
|
|
|
1440
|
|
|
io.save(pdb_file) |
1441
|
|
|
# open a file add remarks |
1442
|
|
|
new_file = '' |
1443
|
|
|
with open(pdb_file, 'r') as f: |
1444
|
|
|
if not args.no_hr: |
1445
|
|
|
new_file += add_header(version) + '\n' |
1446
|
|
|
if remarks: |
1447
|
|
|
new_file += '\n'.join(remarks) + '\n' |
1448
|
|
|
new_file += f.read() |
1449
|
|
|
|
1450
|
|
|
with open(pdb_file, 'w') as f: |
1451
|
|
|
f.write(new_file) |
1452
|
|
|
|
1453
|
|
|
if args.pdb2cif: |
1454
|
|
|
try: |
1455
|
|
|
from pymol import cmd |
1456
|
|
|
except ImportError: |
1457
|
|
|
print('This functionality needs PyMOL. Install it or if installed, check your setup') |
1458
|
|
|
sys.exit(1) |
1459
|
|
|
|
1460
|
|
|
# quick fix - make a list on the spot |
1461
|
|
|
if list != type(args.file): |
1462
|
|
|
args.file = [args.file] |
1463
|
|
|
################################## |
1464
|
|
|
for f in args.file: |
1465
|
|
|
cmd.load(f) |
1466
|
|
|
fo = f.replace('.pdb', '.cif') |
1467
|
|
|
cmd.save(fo, '(all)') |
1468
|
|
|
cmd.delete('all') |
1469
|
|
|
print(fo, 'saved') |
1470
|
|
|
|