|
1
|
|
|
#!/usr/bin/env python |
|
2
|
|
|
# -*- coding: utf-8 -*- |
|
3
|
|
|
|
|
4
|
|
|
"""rna_pdb_tools - a swiss army knife to manipulation of RNA pdb structures |
|
5
|
|
|
|
|
6
|
|
|
Usage:: |
|
7
|
|
|
|
|
8
|
|
|
$ rna_pdb_tools.py --delete A:46-56 --inplace *.pdb |
|
9
|
|
|
|
|
10
|
|
|
$ rna_pdb_tools.py --get-seq * |
|
11
|
|
|
# BujnickiLab_RNApuzzle14_n01bound |
|
12
|
|
|
> A:1-61 |
|
13
|
|
|
# BujnickiLab_RNApuzzle14_n02bound |
|
14
|
|
|
> A:1-61 |
|
15
|
|
|
CGUUAGCCCAGGAAACUGGGCGGAAGUAAGGCCCAUUGCACUCCGGGCCUGAAGCAACGCG |
|
16
|
|
|
[...] |
|
17
|
|
|
|
|
18
|
|
|
See `rna_pdb_merge_into_one.py` to merge PDB files in the order as you like into one NMR-like (multimodel) file |
|
19
|
|
|
|
|
20
|
|
|
Examples:: |
|
21
|
|
|
|
|
22
|
|
|
rna_pdb_tools.py --backbone-only --get-rnapuzzle-ready --inplace --suffix=bo examples/4GXY_min.pdb |
|
23
|
|
|
|
|
24
|
|
|
To extract specific atoms for each residue and write them to separate PDB file (next to the input files, following syntax "<input>_<resid>.pdb"):: |
|
25
|
|
|
|
|
26
|
|
|
rna_pdb_tools.py --rpr input/4GXY_min.pdb --save-single-res --ref-frame-only |
|
27
|
|
|
|
|
28
|
|
|
Atoms presets:: |
|
29
|
|
|
|
|
30
|
|
|
--backbone-only used only with --get-rnapuzzle-ready, keep only backbone (= remove bases) |
|
31
|
|
|
--ref-frame-only used only with --get-rnapuzzle-ready, keep only reference frames, OP1 OP2 P |
|
32
|
|
|
--no-backbone used only with --get-rnapuzzle-ready, remove atoms of backbone (define as P OP1 OP2 O5') |
|
33
|
|
|
--bases-only used only with --get-rnapuzzle-ready, keep only atoms of bases |
|
34
|
|
|
|
|
35
|
|
|
.. image:: ../pngs/276411138-236435ff-2944-4bec-ab75-dca0d1e3aacf.jpg |
|
36
|
|
|
|
|
37
|
|
|
|
|
38
|
|
|
-v is for verbose, --version for version ;) |
|
39
|
|
|
""" |
|
40
|
|
|
import argparse |
|
41
|
|
|
import textwrap |
|
42
|
|
|
import os |
|
43
|
|
|
import shutil |
|
44
|
|
|
import sys |
|
45
|
|
|
import tempfile |
|
46
|
|
|
import glob |
|
47
|
|
|
import os |
|
48
|
|
|
from rna_tools.rna_tools_lib import edit_pdb, add_header, get_version, \ |
|
49
|
|
|
collapsed_view, fetch, fetch_ba, fetch_cif, replace_chain, RNAStructure, \ |
|
50
|
|
|
select_pdb_fragment, sort_strings, set_chain_for_struc |
|
51
|
|
|
from rna_tools.tools.rna_x3dna.rna_x3dna import x3DNA |
|
52
|
|
|
|
|
53
|
|
|
|
|
54
|
|
|
def get_parser(): |
|
55
|
|
|
version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__) |
|
56
|
|
|
version = version[1].strip() |
|
57
|
|
|
parser = argparse.ArgumentParser( |
|
58
|
|
|
description=__doc__, formatter_class=argparse.RawTextHelpFormatter) |
|
59
|
|
|
|
|
60
|
|
|
parser.add_argument('--version', help='', action='version', version=version) |
|
61
|
|
|
|
|
62
|
|
|
parser.add_argument('-r', '--report', help='get report', |
|
63
|
|
|
action='store_true') |
|
64
|
|
|
|
|
65
|
|
|
parser.add_argument('--no-progress-bar', help='for --no-progress-bar for --rpr', action='store_true') |
|
66
|
|
|
|
|
67
|
|
|
parser.add_argument('--renum-atoms', help='renumber atoms, tested with --get-seq', |
|
68
|
|
|
action='store_true') |
|
69
|
|
|
|
|
70
|
|
|
parser.add_argument('--renum-nmr', help='', |
|
71
|
|
|
action='store_true') |
|
72
|
|
|
|
|
73
|
|
|
parser.add_argument('--renum-residues-dirty', help='', action='store_true') |
|
74
|
|
|
|
|
75
|
|
|
parser.add_argument('--undo', help='undo operation of action done --inplace, , rename "backup files" .pdb~ to pdb, ALL files in the folder, not only ~ related to the last action (that you might want to revert, so be careful)', action='store_true') |
|
76
|
|
|
|
|
77
|
|
|
parser.add_argument('--delete-anisou', help='remove files with ANISOU records, works with --inplace', |
|
78
|
|
|
action='store_true') |
|
79
|
|
|
|
|
80
|
|
|
parser.add_argument('--remove0', help='remove atoms of X=0 position', |
|
81
|
|
|
action='store_true') |
|
82
|
|
|
|
|
83
|
|
|
parser.add_argument('--fix', help='fix a PDB file, ! external program, pdbfixer used to fix missing atoms', |
|
84
|
|
|
action='store_true') |
|
85
|
|
|
|
|
86
|
|
|
parser.add_argument('--to-mol2', help='fix a PDB file, ! external program, pdbfixer used to fix missing atoms', |
|
87
|
|
|
action='store_true') |
|
88
|
|
|
|
|
89
|
|
|
parser.add_argument('--split-alt-locations', help='splits atoms, e.g. for alt locs A and B, it splits atoms two MODELS (all localizations A goes into MODEL1 and all localizations B goes into MODEL2', |
|
90
|
|
|
action='store_true') |
|
91
|
|
|
|
|
92
|
|
|
parser.add_argument('-c', '--clean', help='get clean structure', |
|
93
|
|
|
action='store_true') |
|
94
|
|
|
|
|
95
|
|
|
parser.add_argument('--is-pdb', help='check if a file is in the pdb format', |
|
96
|
|
|
action='store_true') |
|
97
|
|
|
|
|
98
|
|
|
parser.add_argument('--is-nmr', help='check if a file is NMR-style multiple model pdb', |
|
99
|
|
|
action='store_true') |
|
100
|
|
|
|
|
101
|
|
|
parser.add_argument('--nmr-dir', help='make NMR-style multiple model pdb file from a set of files \n\n' + |
|
102
|
|
|
" rna_pdb_tools.py --nmr-dir . 'cwc15_u5_fragments*.pdb' > ~/Desktop/cwc15-u5.pdb\n\n" + |
|
103
|
|
|
"please use '' for pattern file recognition, this is a hack to deal with folders with\n" |
|
104
|
|
|
"thousands of models, if you used only *.pdb then the terminal will complain that you\n" |
|
105
|
|
|
"selected to many files.") |
|
106
|
|
|
|
|
107
|
|
|
parser.add_argument('--un-nmr', help="""split NMR-style multiple model pdb files into individual models [biopython], |
|
108
|
|
|
|
|
109
|
|
|
rna_pdb_tools.py --un-nmr split.pdb |
|
110
|
|
|
2 |
|
111
|
|
|
/Users/magnus/Desktop/3hl2/split_1.pdb |
|
112
|
|
|
/Users/magnus/Desktop/3hl2/split_2.pdb |
|
113
|
|
|
|
|
114
|
|
|
""", |
|
115
|
|
|
action='store_true') |
|
116
|
|
|
|
|
117
|
|
|
parser.add_argument('--orgmode', help='get a structure in org-mode format <sick!>', |
|
118
|
|
|
action='store_true') |
|
119
|
|
|
|
|
120
|
|
|
parser.add_argument('--get-chain', help='get chain, one or many, e.g, A, but now also ABC works') |
|
121
|
|
|
|
|
122
|
|
|
parser.add_argument('--fetch', action='store_true', help='fetch file from the PDB db, e.g., 1xjr,\nuse \'rp\' to fetch, fetch a given join, 4w90:C or 4w90_C' + |
|
123
|
|
|
'the RNA-Puzzles standardized_dataset [around 100 MB]') |
|
124
|
|
|
|
|
125
|
|
|
parser.add_argument('--fetch-cif', action='store_true', help='') |
|
126
|
|
|
|
|
127
|
|
|
parser.add_argument('--fetch-ba', action='store_true', |
|
128
|
|
|
help='fetch biological assembly from the PDB db') |
|
129
|
|
|
|
|
130
|
|
|
parser.add_argument('--fetch-chain', action='store_true', help='fetch a structure in extract chain, e.g. 6bk8 H') |
|
131
|
|
|
|
|
132
|
|
|
parser.add_argument('--fetch-fasta', action='store_true', help='fetch a fasta/sequence for given PDB ID, e.g. 6bk8') |
|
133
|
|
|
|
|
134
|
|
|
parser.add_argument('--get-seq', help='get seq', action='store_true') |
|
135
|
|
|
|
|
136
|
|
|
parser.add_argument('--rgyration', help='get seq', action='store_true') |
|
137
|
|
|
|
|
138
|
|
|
parser.add_argument('--color-seq', help='color seq, works with --get-seq', action='store_true') |
|
139
|
|
|
|
|
140
|
|
|
parser.add_argument('--ignore-files', help='files to be ingored, .e.g, \'solution\'') |
|
141
|
|
|
|
|
142
|
|
|
parser.add_argument('--compact', |
|
143
|
|
|
help=textwrap.dedent("""with --get-seq, get it in compact view' |
|
144
|
|
|
$ rna_pdb_tools.py --get-seq --compact *.pdb |
|
145
|
|
|
# 20_Bujnicki_1 |
|
146
|
|
|
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68 |
|
147
|
|
|
# 20_Bujnicki_2 |
|
148
|
|
|
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68 |
|
149
|
|
|
# 20_Bujnicki_3 |
|
150
|
|
|
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68 |
|
151
|
|
|
# 20_Bujnicki_4 |
|
152
|
|
|
|
|
153
|
|
|
"""), action='store_true') |
|
154
|
|
|
|
|
155
|
|
|
parser.add_argument('--hide-warnings', help='hide warnings, works with --get-chain, it hides warnings that given changes are not detected in a PDB file', action='store_true') |
|
156
|
|
|
|
|
157
|
|
|
parser.add_argument('--get-ss', help='get secondary structure', action='store_true') |
|
158
|
|
|
|
|
159
|
|
|
parser.add_argument('--rosetta2generic', help='convert ROSETTA-like format to a generic pdb', |
|
160
|
|
|
action='store_true') |
|
161
|
|
|
|
|
162
|
|
|
parser.add_argument('--no-hr', help='do not insert the header into files', |
|
163
|
|
|
action='store_true') |
|
164
|
|
|
|
|
165
|
|
|
parser.add_argument('--renumber-residues', help='by defult is false', |
|
166
|
|
|
action='store_true') |
|
167
|
|
|
|
|
168
|
|
|
parser.add_argument('--dont-rename-chains', |
|
169
|
|
|
help=textwrap.dedent("""used only with --get-rnapuzzle-ready. |
|
170
|
|
|
By default: |
|
171
|
|
|
--get-rnapuzzle-ready rename chains from ABC.. to stop behavior switch on this option |
|
172
|
|
|
"""), |
|
173
|
|
|
action='store_true') |
|
174
|
|
|
|
|
175
|
|
|
parser.add_argument('--dont-fix-missing-atoms', |
|
176
|
|
|
help="""used only with --get-rnapuzzle-ready""", |
|
177
|
|
|
action='store_true') |
|
178
|
|
|
|
|
179
|
|
|
parser.add_argument('--inspect', |
|
180
|
|
|
help="inspect missing atoms (technically decorator to --get-rnapuzzle-ready without actually doing anything but giving a report on problems)", |
|
181
|
|
|
action='store_true') |
|
182
|
|
|
|
|
183
|
|
|
parser.add_argument('--collapsed-view', help='', |
|
184
|
|
|
action='store_true') |
|
185
|
|
|
|
|
186
|
|
|
parser.add_argument('--cv', help='alias to collapsed_view', |
|
187
|
|
|
action='store_true') |
|
188
|
|
|
|
|
189
|
|
|
parser.add_argument('-v', '--verbose', help='tell me more what you\'re doing, please!', |
|
190
|
|
|
action='store_true') |
|
191
|
|
|
|
|
192
|
|
|
parser.add_argument('--mutate', help=textwrap.dedent("""mutate residues, |
|
193
|
|
|
e.g., |
|
194
|
|
|
--mutate "A:1a+2a+3a+4a,B:1a" |
|
195
|
|
|
to mutate to adenines the first four nucleotides of the chain A |
|
196
|
|
|
and the first nucleotide of the chain B""")) |
|
197
|
|
|
|
|
198
|
|
|
parser.add_argument('--edit', |
|
199
|
|
|
dest="edit", |
|
200
|
|
|
default='', |
|
201
|
|
|
help="edit 'A:6>B:200', 'A:2-7>B:2-7'") |
|
202
|
|
|
|
|
203
|
|
|
parser.add_argument('--rename-chain', |
|
204
|
|
|
help="edit 'A>B' to rename chain A to chain B") |
|
205
|
|
|
|
|
206
|
|
|
parser.add_argument('--swap-chains', help='B>A, rename A to _, then B to A, then _ to B') |
|
207
|
|
|
|
|
208
|
|
|
parser.add_argument('--set-chain', help='set chain for all ATOM lines and TER (quite brutal function)') |
|
209
|
|
|
|
|
210
|
|
|
parser.add_argument('--replace-chain', |
|
211
|
|
|
default='', |
|
212
|
|
|
help=textwrap.dedent("""a file PDB name with one chain that will be used to |
|
213
|
|
|
replace the chain in the original PDB file, |
|
214
|
|
|
the chain id in this file has to be the same with the chain id of the original chain""")) |
|
215
|
|
|
|
|
216
|
|
|
parser.add_argument('--delete', # type="string", |
|
217
|
|
|
dest="delete", |
|
218
|
|
|
default='', |
|
219
|
|
|
help="delete the selected fragment, e.g. A:10-16, or for more than one fragment --delete 'A:1-25+30-57'") |
|
220
|
|
|
|
|
221
|
|
|
parser.add_argument('--extract', # type="string", |
|
222
|
|
|
dest="extract", |
|
223
|
|
|
default='', |
|
224
|
|
|
help="extract the selected fragment, e.g. A:10-16, or for more than one fragment --extract 'A:1-25+30-57', or even 'A:1-25+B:30-57'") |
|
225
|
|
|
|
|
226
|
|
|
parser.add_argument('--extract-chain', |
|
227
|
|
|
help="extract chain, e.g. A") |
|
228
|
|
|
|
|
229
|
|
|
parser.add_argument('--uniq', help=textwrap.dedent(""" |
|
230
|
|
|
rna_pdb_tools.py --get-seq --uniq '[:5]' --compact --chain-first * | sort |
|
231
|
|
|
A:1-121 ACCUUGCGCAACUGGCGAAUCCUGGGGCUGCCGCCGGCAGUACCC...CA # rp13nc3295_min.out.1 |
|
232
|
|
|
A:1-123 ACCUUGCGCGACUGGCGAAUCCUGAAGCUGCUUUGAGCGGCUUCG...AG # rp13cp0016_min.out.1 |
|
233
|
|
|
A:1-123 ACCUUGCGCGACUGGCGAAUCCUGAAGCUGCUUUGAGCGGCUUCG...AG # zcp_6537608a_ALL-000001_AA |
|
234
|
|
|
A:1-45 57-71 GGGUCGUGACUGGCGAACAGGUGGGAAACCACCGGGGAGCGACCCGCCGCCCGCCUGGGC # solution |
|
235
|
|
|
""")) |
|
236
|
|
|
|
|
237
|
|
|
parser.add_argument('--chain-first', help="", action='store_true') |
|
238
|
|
|
parser.add_argument('--oneline', help="", action='store_true') |
|
239
|
|
|
|
|
240
|
|
|
parser.add_argument('--replace-htm', help="", action='store_true') |
|
241
|
|
|
|
|
242
|
|
|
parser.add_argument('--fasta', |
|
243
|
|
|
help= textwrap.dedent("""with --get-seq, show sequences in fasta format, |
|
244
|
|
|
can be combined with --compact (mind, chains will be separated with ' ' in one line) |
|
245
|
|
|
|
|
246
|
|
|
$ rna_pdb_tools.py --get-seq --fasta --compact input/20_Bujnicki_1.pdb |
|
247
|
|
|
> 20_Bujnicki_1 |
|
248
|
|
|
ACCCGCAAGGCCGACGGC GCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU |
|
249
|
|
|
|
|
250
|
|
|
"""), action='store_true') |
|
251
|
|
|
|
|
252
|
|
|
parser.add_argument('--cif2pdb', help="convert cif to PDB, fancy way", action='store_true') |
|
253
|
|
|
parser.add_argument('--pdb2cif', help="[PyMOL Python package required]", action='store_true') |
|
254
|
|
|
|
|
255
|
|
|
parser.add_argument('--mdr', help='get structures ready for MD (like rpr but without first)', |
|
256
|
|
|
action='store_true') |
|
257
|
|
|
|
|
258
|
|
|
x = parser.add_argument_group('RNAPUZZLE-READY') |
|
259
|
|
|
x.add_argument('--get-rnapuzzle-ready', |
|
260
|
|
|
help=textwrap.dedent("""get RNApuzzle ready (keep only standard atoms).' |
|
261
|
|
|
Be default it does not renumber residues, use --renumber-residues |
|
262
|
|
|
[requires BioPython]"""), action='store_true') |
|
263
|
|
|
|
|
264
|
|
|
x.add_argument('--rpr', help='alias to get_rnapuzzle ready)', |
|
265
|
|
|
action='store_true') |
|
266
|
|
|
|
|
267
|
|
|
|
|
268
|
|
|
rpr = parser.add_argument_group('CAN BE COMBINED WITH')# --get-rnapuzzle-ready (or --rpr) can be combined with') |
|
269
|
|
|
|
|
270
|
|
|
rpr.add_argument('--keep-hetatm', help='keep hetatoms', action='store_true') |
|
271
|
|
|
|
|
272
|
|
|
rpr.add_argument('--inplace', help=textwrap.dedent("""in place edit the file! [experimental, |
|
273
|
|
|
only for get_rnapuzzle_ready, --delete, --get-ss, --get-seq, --edit-pdb]"""), |
|
274
|
|
|
action='store_true') |
|
275
|
|
|
|
|
276
|
|
|
rpr.add_argument('--here', help=textwrap.dedent("""save a file next to the original file with auto suffix |
|
277
|
|
|
for --extract it's .extr.pdb"""), |
|
278
|
|
|
action='store_true') |
|
279
|
|
|
|
|
280
|
|
|
rpr.add_argument('--suffix', help=textwrap.dedent("""when used with --inplace allows you to change a name of a new file, --suffix del will give <file>_del.pdb (mind added _)""")) |
|
281
|
|
|
|
|
282
|
|
|
rpr.add_argument('--replace-hetatm', help="replace 'HETATM' with 'ATOM' [tested only with --get-rnapuzzle-ready]", |
|
283
|
|
|
action="store_true") |
|
284
|
|
|
|
|
285
|
|
|
rpr.add_argument('--dont-report-missing-atoms', |
|
286
|
|
|
help="""used only with --get-rnapuzzle-ready""", |
|
287
|
|
|
action='store_true') |
|
288
|
|
|
|
|
289
|
|
|
rpr.add_argument('--backbone-only', |
|
290
|
|
|
help="""used only with --get-rnapuzzle-ready, keep only backbone (= remove bases)""", |
|
291
|
|
|
action='store_true') |
|
292
|
|
|
|
|
293
|
|
|
rpr.add_argument('--p-only', |
|
294
|
|
|
help="""used only with --get-rnapuzzle-ready, keep p backbone (= remove bases)""", |
|
295
|
|
|
action='store_true') |
|
296
|
|
|
|
|
297
|
|
|
rpr.add_argument('--ref-frame-only', |
|
298
|
|
|
help="""used only with --get-rnapuzzle-ready, keep only reference frames, OP1 OP2 P""", |
|
299
|
|
|
action='store_true') |
|
300
|
|
|
|
|
301
|
|
|
rpr.add_argument('--no-backbone', |
|
302
|
|
|
help="""used only with --get-rnapuzzle-ready, remove atoms of backbone (define as P OP1 OP2 O5')""", |
|
303
|
|
|
action='store_true') |
|
304
|
|
|
|
|
305
|
|
|
rpr.add_argument('--bases-only', |
|
306
|
|
|
help="""used only with --get-rnapuzzle-ready, keep only atoms of bases""", |
|
307
|
|
|
action='store_true') |
|
308
|
|
|
|
|
309
|
|
|
rpr.add_argument('--save-single-res', |
|
310
|
|
|
help="""used only with --get-rnapuzzle-ready, for each residue create a new pdb output file, you can combine it with --bases-only etc.""", |
|
311
|
|
|
action='store_true') |
|
312
|
|
|
|
|
313
|
|
|
parser.add_argument('file', help='file', nargs='+') |
|
314
|
|
|
#parser.add_argument('outfile', help='outfile') |
|
315
|
|
|
return parser |
|
316
|
|
|
|
|
317
|
|
|
|
|
318
|
|
|
# main |
|
319
|
|
|
if __name__ == '__main__': |
|
320
|
|
|
# get version |
|
321
|
|
|
version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__) |
|
322
|
|
|
version = version[1].strip() |
|
323
|
|
|
|
|
324
|
|
|
# get parser and arguments |
|
325
|
|
|
parser = get_parser() |
|
326
|
|
|
|
|
327
|
|
|
args = parser.parse_args() |
|
328
|
|
|
|
|
329
|
|
|
# quick fix for one files vs file-s |
|
330
|
|
|
if list == type(args.file) and len(args.file) == 1: |
|
331
|
|
|
args.file = args.file[0] |
|
332
|
|
|
|
|
333
|
|
|
if args.report: |
|
334
|
|
|
s = RNAStructure(args.file) |
|
335
|
|
|
print(s.get_report) |
|
336
|
|
|
print(s.get_preview()) |
|
337
|
|
|
print(s.get_info_chains()) |
|
338
|
|
|
|
|
339
|
|
|
if args.clean: |
|
340
|
|
|
s = RNAStructure(args.file) |
|
341
|
|
|
s.decap_gtp() |
|
342
|
|
|
s.std_resn() |
|
343
|
|
|
s.remove_hydrogen() |
|
344
|
|
|
s.remove_ion() |
|
345
|
|
|
s.remove_water() |
|
346
|
|
|
s.renum_atoms() |
|
347
|
|
|
s.fix_O_in_UC() |
|
348
|
|
|
s.fix_op_atoms() |
|
349
|
|
|
# print s.get_preview() |
|
350
|
|
|
# s.write(args.outfile) |
|
351
|
|
|
if not args.no_hr: |
|
352
|
|
|
print(add_header(version)) |
|
353
|
|
|
print(s.get_text()) |
|
354
|
|
|
|
|
355
|
|
|
if args.get_seq: |
|
356
|
|
|
# quick fix - make a list on the spot |
|
357
|
|
|
if list != type(args.file): |
|
358
|
|
|
args.file = [args.file] |
|
359
|
|
|
################################## |
|
360
|
|
|
analyzed = [] |
|
361
|
|
|
for f in args.file: |
|
362
|
|
|
##################################### |
|
363
|
|
|
if args.uniq: |
|
364
|
|
|
subname = eval('f' + args.uniq) |
|
365
|
|
|
if subname in analyzed: |
|
366
|
|
|
continue |
|
367
|
|
|
else: |
|
368
|
|
|
analyzed.append(subname) |
|
369
|
|
|
######## |
|
370
|
|
|
s = RNAStructure(f) |
|
371
|
|
|
if not s.is_pdb(): |
|
372
|
|
|
print('Error: Not a PDB file %s' % f) |
|
373
|
|
|
sys.exit(1) |
|
374
|
|
|
s.decap_gtp() |
|
375
|
|
|
s.std_resn() |
|
376
|
|
|
s.remove_hydrogen() |
|
377
|
|
|
s.remove_ion() |
|
378
|
|
|
s.remove_water() |
|
379
|
|
|
if args.renum_atoms: |
|
380
|
|
|
s.renum_atoms() |
|
381
|
|
|
s.fix_O_in_UC() |
|
382
|
|
|
s.fix_op_atoms() |
|
383
|
|
|
|
|
384
|
|
|
output = '' |
|
385
|
|
|
|
|
386
|
|
|
# with # is easier to grep this out |
|
387
|
|
|
if args.fasta: |
|
388
|
|
|
# s.fn vs s.name |
|
389
|
|
|
output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, fasta=args.fasta, addfn=s.name, color=args.color_seq) + '\n' |
|
390
|
|
|
elif args.oneline: |
|
391
|
|
|
output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq).strip() + ' # '+ os.path.basename(f.replace('.pdb', '')) + '\n' |
|
392
|
|
|
else: |
|
393
|
|
|
output += '# ' + os.path.basename(f.replace('.pdb', '')) + '\n' |
|
394
|
|
|
output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq) + '\n' |
|
395
|
|
|
|
|
396
|
|
|
try: |
|
397
|
|
|
sys.stdout.write(output) |
|
398
|
|
|
sys.stdout.flush() |
|
399
|
|
|
except IOError: |
|
400
|
|
|
pass |
|
401
|
|
|
|
|
402
|
|
|
if args.get_ss: |
|
403
|
|
|
# quick fix - make a list on the spot |
|
404
|
|
|
if list != type(args.file): |
|
405
|
|
|
args.file = [args.file] |
|
406
|
|
|
################################## |
|
407
|
|
|
for f in args.file: |
|
408
|
|
|
output = f + '\n' |
|
409
|
|
|
p = x3DNA(f) |
|
410
|
|
|
output += p.get_secstruc() + '\n' |
|
411
|
|
|
try: |
|
412
|
|
|
sys.stdout.write(output) |
|
413
|
|
|
sys.stdout.flush() |
|
414
|
|
|
except IOError: |
|
415
|
|
|
pass |
|
416
|
|
|
|
|
417
|
|
|
# getchain |
|
418
|
|
|
if args.get_chain: |
|
419
|
|
|
s = RNAStructure(args.file) |
|
420
|
|
|
## s.std_resn() |
|
421
|
|
|
## s.remove_hydrogen() |
|
422
|
|
|
## s.remove_ion() |
|
423
|
|
|
## s.remove_water() |
|
424
|
|
|
## s.renum_atoms() |
|
425
|
|
|
## s.fix_O_in_UC() |
|
426
|
|
|
## s.fix_op_atoms() |
|
427
|
|
|
# print s.get_preview() |
|
428
|
|
|
warningmsg = '' |
|
429
|
|
|
for chain in list(args.get_chain): |
|
430
|
|
|
chain_txt = s.get_chain(chain) |
|
431
|
|
|
if not chain_txt.strip(): |
|
432
|
|
|
warningmsg += 'Warning: Chain %s not detected!' % chain |
|
433
|
|
|
else: |
|
434
|
|
|
print(chain_txt) |
|
435
|
|
|
if not args.hide_warnings: |
|
436
|
|
|
if warningmsg: |
|
437
|
|
|
print(warningmsg) |
|
438
|
|
|
|
|
439
|
|
|
if args.rosetta2generic: |
|
440
|
|
|
s = RNAStructure(args.file) |
|
441
|
|
|
s.std_resn() |
|
442
|
|
|
s.remove_hydrogen() |
|
443
|
|
|
s.remove_ion() |
|
444
|
|
|
s.remove_water() |
|
445
|
|
|
s.fix_op_atoms() |
|
446
|
|
|
s.renum_atoms() |
|
447
|
|
|
# print s.get_preview() |
|
448
|
|
|
# s.write(args.outfile) |
|
449
|
|
|
if not args.no_hr: |
|
450
|
|
|
print(add_header(version)) |
|
451
|
|
|
print(s.get_text()) |
|
452
|
|
|
|
|
453
|
|
|
remarks_only = False |
|
454
|
|
|
if args.inspect: |
|
455
|
|
|
args.get_rnapuzzle_ready = True |
|
456
|
|
|
fix_missing_atom = False |
|
457
|
|
|
remarks_only = True |
|
458
|
|
|
args.dont_rename_chains = True |
|
459
|
|
|
args.renumber_residues = False |
|
460
|
|
|
|
|
461
|
|
|
if args.get_rnapuzzle_ready or args.rpr or args.mdr: # rnapuzzle |
|
462
|
|
|
# quick fix - make a list on the spot |
|
463
|
|
|
if list != type(args.file): |
|
464
|
|
|
args.file = [args.file] |
|
465
|
|
|
################################## |
|
466
|
|
|
# progress bar only in --inplace mode! |
|
467
|
|
|
if args.inplace: |
|
468
|
|
|
if not args.no_progress_bar: |
|
469
|
|
|
import progressbar |
|
470
|
|
|
bar = progressbar.ProgressBar(max_value=len(args.file)) |
|
471
|
|
|
bar.update(0) |
|
472
|
|
|
|
|
473
|
|
|
for c, f in enumerate(args.file): |
|
474
|
|
|
if args.verbose: |
|
475
|
|
|
print(f) |
|
476
|
|
|
if args.inplace: |
|
477
|
|
|
shutil.copy(f, f + '~') |
|
478
|
|
|
|
|
479
|
|
|
# keep previous edits |
|
480
|
|
|
previous_edits = [] |
|
481
|
|
|
with open(f) as fx: |
|
482
|
|
|
for l in fx: |
|
483
|
|
|
if l.startswith('HEADER --'): |
|
484
|
|
|
previous_edits.append(l.strip()) |
|
485
|
|
|
###################### |
|
486
|
|
|
|
|
487
|
|
|
s = RNAStructure(f) |
|
488
|
|
|
if args.replace_hetatm: |
|
489
|
|
|
s.replace_hetatms() |
|
490
|
|
|
|
|
491
|
|
|
s.remove_hydrogen() |
|
492
|
|
|
s.decap_gtp() |
|
493
|
|
|
s.std_resn() |
|
494
|
|
|
s.fix_op_atoms() |
|
495
|
|
|
|
|
496
|
|
|
s.remove_ion() |
|
497
|
|
|
s.remove_water() |
|
498
|
|
|
# s.renum_atoms() |
|
499
|
|
|
|
|
500
|
|
|
s.shift_atom_names() |
|
501
|
|
|
s.prune_elements() |
|
502
|
|
|
|
|
503
|
|
|
# s.write('tmp.pdb') |
|
504
|
|
|
|
|
505
|
|
|
rename_chains = False if args.dont_rename_chains else True |
|
506
|
|
|
|
|
507
|
|
|
report_missing_atoms = not args.dont_report_missing_atoms |
|
508
|
|
|
fix_missing_atom = not args.dont_fix_missing_atoms |
|
509
|
|
|
|
|
510
|
|
|
ignore_op3 = False |
|
511
|
|
|
if args.mdr: |
|
512
|
|
|
ignore_op3 = True |
|
513
|
|
|
|
|
514
|
|
|
remarks = s.get_rnapuzzle_ready(args.renumber_residues, fix_missing_atoms=fix_missing_atom, |
|
515
|
|
|
rename_chains=rename_chains, |
|
516
|
|
|
report_missing_atoms=report_missing_atoms, |
|
517
|
|
|
backbone_only=args.backbone_only, |
|
518
|
|
|
no_backbone=args.no_backbone, |
|
519
|
|
|
bases_only=args.bases_only, |
|
520
|
|
|
p_only=args.p_only, |
|
521
|
|
|
keep_hetatm=args.keep_hetatm, |
|
522
|
|
|
ignore_op3=ignore_op3, |
|
523
|
|
|
save_single_res=args.save_single_res, |
|
524
|
|
|
ref_frame_only = args.ref_frame_only, |
|
525
|
|
|
verbose=args.verbose) |
|
526
|
|
|
|
|
527
|
|
|
if args.inplace: |
|
528
|
|
|
if args.suffix: |
|
529
|
|
|
f = f.replace('.pdb', '_' + args.suffix + '.pdb') |
|
530
|
|
|
if args.verbose: print(f) |
|
531
|
|
|
|
|
532
|
|
|
with open(f, 'w') as f: |
|
533
|
|
|
if not args.no_hr: |
|
534
|
|
|
f.write(add_header(version) + '\n') |
|
535
|
|
|
if previous_edits: |
|
536
|
|
|
f.write('\n'.join(previous_edits) + '\n') |
|
537
|
|
|
if remarks: |
|
538
|
|
|
f.write('\n'.join(remarks) + '\n') |
|
539
|
|
|
f.write(s.get_text()) |
|
540
|
|
|
# progress bar only in --inplace mode! |
|
541
|
|
|
if not args.no_progress_bar: |
|
542
|
|
|
bar.update(c) |
|
|
|
|
|
|
543
|
|
|
|
|
544
|
|
|
else: |
|
545
|
|
|
output = '' |
|
546
|
|
|
if not args.no_hr: |
|
547
|
|
|
output += add_header(version) + '\n' |
|
548
|
|
|
if remarks: |
|
549
|
|
|
output += '\n'.join(remarks) + '\n' |
|
550
|
|
|
output += s.get_text() + '\n' |
|
551
|
|
|
|
|
552
|
|
|
if remarks_only: |
|
553
|
|
|
sys.stdout.write('\n'.join(remarks)) |
|
554
|
|
|
sys.stdout.flush() |
|
555
|
|
|
else: |
|
556
|
|
|
if args.here: |
|
557
|
|
|
if '_rpr' not in f: # good idea? |
|
558
|
|
|
nf = f.replace('.pdb', '_rpr.pdb') |
|
559
|
|
|
with open(nf, 'w') as fio: |
|
560
|
|
|
print(nf) |
|
561
|
|
|
fio.write(output) |
|
562
|
|
|
else: |
|
563
|
|
|
try: |
|
564
|
|
|
sys.stdout.write(output) |
|
565
|
|
|
sys.stdout.flush() |
|
566
|
|
|
except IOError: |
|
567
|
|
|
pass |
|
568
|
|
|
# hmm... fix for problem with renumbering, i do renumbering |
|
569
|
|
|
# and i stop here |
|
570
|
|
|
# i'm not sure that this is perfect |
|
571
|
|
|
sys.exit(0) |
|
572
|
|
|
|
|
573
|
|
|
|
|
574
|
|
|
if args.renumber_residues: |
|
575
|
|
|
s = RNAStructure(args.file) |
|
576
|
|
|
s.remove_hydrogen() |
|
577
|
|
|
s.remove_ion() |
|
578
|
|
|
s.remove_water() |
|
579
|
|
|
s.get_rnapuzzle_ready(args.renumber_residues) |
|
580
|
|
|
s.renum_atoms() |
|
581
|
|
|
if not args.no_hr: |
|
582
|
|
|
print(add_header(version)) |
|
583
|
|
|
print(s.get_text()) |
|
584
|
|
|
|
|
585
|
|
|
# args.renumber_resides_dirty |
|
586
|
|
|
if args.renum_residues_dirty: |
|
587
|
|
|
# quick fix - make a list on the spot |
|
588
|
|
|
if list != type(args.file): |
|
589
|
|
|
args.file = [args.file] |
|
590
|
|
|
################################## |
|
591
|
|
|
for f in args.file: |
|
592
|
|
|
if args.inplace: |
|
593
|
|
|
shutil.copy(f, f + '~') |
|
594
|
|
|
|
|
595
|
|
|
s = RNAStructure(f) |
|
596
|
|
|
|
|
597
|
|
|
output = '' |
|
598
|
|
|
#if not args.no_hr: |
|
599
|
|
|
# output += add_header(version) + '\n' |
|
600
|
|
|
# output += 'HEADER --delete ' + args.delete + '\n' # ' '.join(str(selection)) |
|
601
|
|
|
c = 1 |
|
602
|
|
|
old_resi = -1 |
|
603
|
|
|
for l in s.lines: |
|
604
|
|
|
if l.startswith('ATOM') or l.startswith("HETATOM"): |
|
605
|
|
|
resi = int(l[23:26].strip()) |
|
606
|
|
|
if resi != old_resi: |
|
607
|
|
|
old_resi = resi |
|
608
|
|
|
c += 1 |
|
609
|
|
|
# print(resi, c) |
|
610
|
|
|
#resi = c |
|
611
|
|
|
#if chain in selection: |
|
612
|
|
|
# if resi in selection[chain]: |
|
613
|
|
|
# continue # print chain, resi |
|
614
|
|
|
output += l[:23] + str(c).rjust(3) + l[26:] + '\n' |
|
615
|
|
|
# write: inplace |
|
616
|
|
|
if args.inplace: |
|
617
|
|
|
with open(f, 'w') as f: |
|
618
|
|
|
f.write(output) |
|
619
|
|
|
else: # write: to stdout |
|
620
|
|
|
try: |
|
621
|
|
|
sys.stdout.write(output) |
|
622
|
|
|
sys.stdout.flush() |
|
623
|
|
|
except IOError: |
|
624
|
|
|
pass |
|
625
|
|
|
|
|
626
|
|
|
|
|
627
|
|
|
if args.undo: |
|
628
|
|
|
# quick fix - make a list on the spot |
|
629
|
|
|
dir = args.file #os.path.abso(os.path.dirname(args.file)) |
|
630
|
|
|
for f in glob.glob(dir + '/*.pdb~'): |
|
631
|
|
|
if args.verbose: |
|
632
|
|
|
print(f, '->',f.replace('.pdb~', '.pdb')) |
|
633
|
|
|
os.rename(f, f.replace('.pdb~', '.pdb')) |
|
634
|
|
|
|
|
635
|
|
|
if args.delete: |
|
636
|
|
|
# quick fix - make a list on the spot |
|
637
|
|
|
if list != type(args.file): |
|
638
|
|
|
args.file = [args.file] |
|
639
|
|
|
################################## |
|
640
|
|
|
for f in args.file: |
|
641
|
|
|
if args.inplace: |
|
642
|
|
|
shutil.copy(f, f + '~') |
|
643
|
|
|
|
|
644
|
|
|
selection = select_pdb_fragment(args.delete) |
|
645
|
|
|
s = RNAStructure(f) |
|
646
|
|
|
|
|
647
|
|
|
output = '' |
|
648
|
|
|
if not args.no_hr: |
|
649
|
|
|
output += add_header(version) + '\n' |
|
650
|
|
|
output += 'HEADER --delete ' + args.delete + '\n' # ' '.join(str(selection)) |
|
651
|
|
|
for l in s.lines: |
|
652
|
|
|
if l.startswith('ATOM'): |
|
653
|
|
|
chain = l[21] |
|
654
|
|
|
resi = int(l[23:26].strip()) |
|
655
|
|
|
if chain in selection: |
|
656
|
|
|
if resi in selection[chain]: |
|
657
|
|
|
continue # print chain, resi |
|
658
|
|
|
output += l + '\n' |
|
659
|
|
|
|
|
660
|
|
|
# write: inplace |
|
661
|
|
|
if args.inplace: |
|
662
|
|
|
os.rename(f, f.replace('.pdb', '.pdb~')) |
|
663
|
|
|
if args.suffix: |
|
664
|
|
|
f = f.replace('.pdb', '_' + args.suffix + '.pdb') |
|
665
|
|
|
with open(f, 'w') as f: |
|
666
|
|
|
f.write(output) |
|
667
|
|
|
else: # write: to stdout |
|
668
|
|
|
try: |
|
669
|
|
|
sys.stdout.write(output) |
|
670
|
|
|
sys.stdout.flush() |
|
671
|
|
|
except IOError: |
|
672
|
|
|
pass |
|
673
|
|
|
|
|
674
|
|
|
if args.replace_chain: |
|
675
|
|
|
# quick fix - make a list on the spot |
|
676
|
|
|
if list != type(args.file): |
|
677
|
|
|
args.file = [args.file] |
|
678
|
|
|
################################## |
|
679
|
|
|
for f in args.file: |
|
680
|
|
|
if args.inplace: |
|
681
|
|
|
shutil.copy(f, f + '~') |
|
682
|
|
|
|
|
683
|
|
|
# --replace_chain <file> without A:<file> it will be easier than --x "A:<file>" |
|
684
|
|
|
s = RNAStructure(args.replace_chain) |
|
685
|
|
|
chain_ids = (s.get_all_chain_ids()) |
|
686
|
|
|
if len(chain_ids) > 1: |
|
687
|
|
|
raise Exception('There is more than one chain in the inserted PDB file. There should be only one chain, the one you want to insert to the PDB.') |
|
688
|
|
|
out = replace_chain(f, args.replace_chain, list(chain_ids)[0]) |
|
689
|
|
|
print(out) |
|
690
|
|
|
|
|
691
|
|
|
if args.mutate: |
|
692
|
|
|
# quick fix - make a list on the spot |
|
693
|
|
|
if list != type(args.file): |
|
694
|
|
|
args.file = [args.file] |
|
695
|
|
|
################################## |
|
696
|
|
|
from rna_tools.tools.mini_moderna3.moderna import * |
|
697
|
|
|
|
|
698
|
|
|
for f in args.file: |
|
699
|
|
|
if args.ignore_files: |
|
700
|
|
|
if args.ignore_files in f: |
|
701
|
|
|
continue |
|
702
|
|
|
|
|
703
|
|
|
if args.inplace: |
|
704
|
|
|
shutil.copy(f, f + '~') # create a backup copy if inplace |
|
705
|
|
|
|
|
706
|
|
|
# create a working copy of the main file |
|
707
|
|
|
ftf = tempfile.NamedTemporaryFile(delete=False).name # f temp file |
|
708
|
|
|
shutil.copy(f, ftf) # create a backup copy if inplace |
|
709
|
|
|
|
|
710
|
|
|
# go over each chain |
|
711
|
|
|
# rna_pdb_tools.py --mutate 'A:1CB:1G,A:1U+B:1A' CG_AB.pdb > ~/Desktop/a.pdb |
|
712
|
|
|
args.mutate = args.mutate.upper() |
|
713
|
|
|
for m in args.mutate.split(','): # A:1A, B:1A |
|
714
|
|
|
chain, resi_mutate_to = m.strip().split(':') # A:1A+2C |
|
715
|
|
|
resi_mutate_to_list = resi_mutate_to.split('+') # A:1A+2C |
|
716
|
|
|
|
|
717
|
|
|
model = load_model(f, chain) |
|
|
|
|
|
|
718
|
|
|
# go over each mutation in this chain |
|
719
|
|
|
for resi_mutate_to in resi_mutate_to_list: |
|
720
|
|
|
resi = resi_mutate_to[:-1] |
|
721
|
|
|
mutate_to = resi_mutate_to[-1] |
|
722
|
|
|
if args.verbose: |
|
723
|
|
|
print('Mutate', model[resi], 'to', mutate_to) |
|
724
|
|
|
exchange_single_base(model[resi], mutate_to) |
|
|
|
|
|
|
725
|
|
|
|
|
726
|
|
|
# multi mutation in one chain |
|
727
|
|
|
tf = tempfile.NamedTemporaryFile(delete=False) |
|
728
|
|
|
model.write_pdb_file(tf.name) |
|
729
|
|
|
|
|
730
|
|
|
# work on the copy of f, ftf |
|
731
|
|
|
output = replace_chain(ftf, tf.name, chain) |
|
732
|
|
|
with open(ftf, 'w') as tmp: |
|
733
|
|
|
tmp.write(output) |
|
734
|
|
|
|
|
735
|
|
|
# write: inplace |
|
736
|
|
|
if args.inplace: |
|
737
|
|
|
# ftf now is f, get ready for the final output |
|
738
|
|
|
if args.suffix: |
|
739
|
|
|
f = f.replace('.pdb', '_' + args.suffix + '.pdb') |
|
740
|
|
|
# rpr on the file |
|
741
|
|
|
shutil.copy(ftf, f) |
|
742
|
|
|
os.system('rna_pdb_tools.py --rpr --no-progress-bar --inplace ' + f) |
|
743
|
|
|
else: # write: to stdout |
|
744
|
|
|
try: |
|
745
|
|
|
sys.stdout.write(output) |
|
|
|
|
|
|
746
|
|
|
sys.stdout.flush() |
|
747
|
|
|
except IOError: |
|
748
|
|
|
pass |
|
749
|
|
|
|
|
750
|
|
|
|
|
751
|
|
|
# extract |
|
752
|
|
|
if args.extract: |
|
753
|
|
|
# quick fix - make a list on the spot |
|
754
|
|
|
if list != type(args.file): |
|
755
|
|
|
args.file = [args.file] |
|
756
|
|
|
################################## |
|
757
|
|
|
for f in args.file: |
|
758
|
|
|
if args.inplace: |
|
759
|
|
|
shutil.copy(f, f + '~') |
|
760
|
|
|
|
|
761
|
|
|
selection = select_pdb_fragment(args.extract) |
|
762
|
|
|
s = RNAStructure(f) |
|
763
|
|
|
|
|
764
|
|
|
output = '' |
|
765
|
|
|
if not args.no_hr: |
|
766
|
|
|
output += add_header(version) + '\n' |
|
767
|
|
|
output += 'HEADER extract ' + args.extract + '\n' # ' '.join(str(selection)) |
|
768
|
|
|
for l in s.lines: |
|
769
|
|
|
if l.startswith('ATOM'): |
|
770
|
|
|
chain = l[21] |
|
771
|
|
|
resi = int(l[23:26].strip()) |
|
772
|
|
|
if chain in selection: |
|
773
|
|
|
if resi in selection[chain]: |
|
774
|
|
|
# continue # print chain, resi |
|
775
|
|
|
output += l + '\n' |
|
776
|
|
|
|
|
777
|
|
|
# write: inplace |
|
778
|
|
|
if args.inplace: |
|
779
|
|
|
with open(f, 'w') as f: |
|
780
|
|
|
f.write(output) |
|
781
|
|
|
elif args.here: |
|
782
|
|
|
if '_extr' not in f: # good idea? |
|
783
|
|
|
nf = f.replace('.pdb', '_extr.pdb') |
|
784
|
|
|
with open(nf, 'w') as fio: |
|
785
|
|
|
print(nf) |
|
786
|
|
|
fio.write(output) |
|
787
|
|
|
else: # write: to stdout |
|
788
|
|
|
try: |
|
789
|
|
|
sys.stdout.write(output) |
|
790
|
|
|
sys.stdout.flush() |
|
791
|
|
|
except IOError: |
|
792
|
|
|
pass |
|
793
|
|
|
|
|
794
|
|
|
|
|
795
|
|
|
if args.extract_chain: |
|
796
|
|
|
# quick fix - make a list on the spot |
|
797
|
|
|
if list != type(args.file): |
|
798
|
|
|
args.file = [args.file] |
|
799
|
|
|
################################## |
|
800
|
|
|
for f in args.file: |
|
801
|
|
|
if args.inplace: |
|
802
|
|
|
shutil.copy(f, f + '~') |
|
803
|
|
|
|
|
804
|
|
|
selection = select_pdb_fragment(args.extract) |
|
805
|
|
|
s = RNAStructure(f) |
|
806
|
|
|
|
|
807
|
|
|
output = '' |
|
808
|
|
|
if not args.no_hr: |
|
809
|
|
|
output += add_header(version) + '\n' |
|
810
|
|
|
output += 'HEADER extract ' + args.extract + '\n' # ' '.join(str(selection)) |
|
811
|
|
|
for l in s.lines: |
|
812
|
|
|
if l.startswith('ATOM') or l.startswith('TER') or l.startswith('HETATM'): |
|
813
|
|
|
chain = l[21] |
|
814
|
|
|
if chain in args.extract_chain: |
|
815
|
|
|
output += l + '\n' |
|
816
|
|
|
|
|
817
|
|
|
# write: inplace |
|
818
|
|
|
if args.inplace: |
|
819
|
|
|
with open(f, 'w') as f: |
|
820
|
|
|
f.write(output) |
|
821
|
|
|
else: # write: to stdout |
|
822
|
|
|
try: |
|
823
|
|
|
sys.stdout.write(output) |
|
824
|
|
|
sys.stdout.flush() |
|
825
|
|
|
except IOError: |
|
826
|
|
|
pass |
|
827
|
|
|
|
|
828
|
|
|
if args.is_pdb: |
|
829
|
|
|
s = RNAStructure(args.file) |
|
830
|
|
|
output = str(s.is_pdb()) |
|
831
|
|
|
sys.stdout.write(output + '\n') |
|
832
|
|
|
|
|
833
|
|
|
if args.un_nmr: |
|
834
|
|
|
s = RNAStructure(args.file) |
|
835
|
|
|
str(s.un_nmr()) |
|
836
|
|
|
|
|
837
|
|
|
if args.is_nmr: |
|
838
|
|
|
struc = RNAStructure(args.file) |
|
839
|
|
|
output = str(struc.is_nmr(args.verbose)) |
|
840
|
|
|
sys.stdout.write(output + '\n') |
|
841
|
|
|
|
|
842
|
|
|
#edit |
|
843
|
|
|
if args.edit: |
|
844
|
|
|
if list != type(args.file): |
|
845
|
|
|
args.file = [args.file] |
|
846
|
|
|
################################## |
|
847
|
|
|
for f in args.file: |
|
848
|
|
|
if args.verbose: |
|
849
|
|
|
print(f) |
|
850
|
|
|
if args.inplace: |
|
851
|
|
|
shutil.copy(f, f + '~') |
|
852
|
|
|
|
|
853
|
|
|
output = edit_pdb(f, args) |
|
854
|
|
|
|
|
855
|
|
|
if args.inplace: |
|
856
|
|
|
with open(f, 'w') as f: |
|
857
|
|
|
f.write(output) |
|
858
|
|
|
else: # write: to stdout |
|
859
|
|
|
try: |
|
860
|
|
|
sys.stdout.write(output) |
|
861
|
|
|
sys.stdout.flush() |
|
862
|
|
|
except IOError: |
|
863
|
|
|
pass |
|
864
|
|
|
|
|
865
|
|
|
|
|
866
|
|
|
if args.fetch: |
|
867
|
|
|
fn = fetch(args.file) |
|
868
|
|
|
|
|
869
|
|
|
if args.fetch_fasta: |
|
870
|
|
|
pdb_id = args.file |
|
871
|
|
|
pdb_id = pdb_id.replace('.pdb', '') |
|
872
|
|
|
|
|
873
|
|
|
import urllib3 |
|
874
|
|
|
http = urllib3.PoolManager() |
|
875
|
|
|
|
|
876
|
|
|
response = http.request('GET', 'https://www.rcsb.org/fasta/entry/' + pdb_id.upper()) |
|
877
|
|
|
if not response.status == 200: |
|
878
|
|
|
raise PDBFetchError() |
|
|
|
|
|
|
879
|
|
|
else: |
|
880
|
|
|
txt = response.data.decode("utf-8").strip() |
|
881
|
|
|
print(txt) |
|
882
|
|
|
if 'No valid PDB IDs were submitted' in txt: |
|
883
|
|
|
pass |
|
884
|
|
|
else: |
|
885
|
|
|
with open(pdb_id + '.fa', 'w') as f: |
|
886
|
|
|
f.write(txt) |
|
887
|
|
|
|
|
888
|
|
|
|
|
889
|
|
|
if args.fetch_chain: |
|
890
|
|
|
fn = fetch(args.file[0]) |
|
891
|
|
|
chain = args.file[1] |
|
892
|
|
|
nfn = fn.replace('.pdb','') + '_' + chain + '.pdb' # 6bk8_H.pdb |
|
893
|
|
|
#cmd = 'rna_pdb_tools.py --get-chain %s %s' > %s' % (chain, fn, nfn) |
|
894
|
|
|
cmd = 'rna_pdb_tools.py --get-chain %s %s' % (chain, fn) |
|
895
|
|
|
os.system(cmd) |
|
896
|
|
|
# print(nfn) |
|
897
|
|
|
|
|
898
|
|
|
if args.fetch_ba: |
|
899
|
|
|
fetch_ba(args.file) |
|
900
|
|
|
|
|
901
|
|
|
if args.fetch_cif: |
|
902
|
|
|
if list != type(args.file): |
|
903
|
|
|
args.file = [args.file] |
|
904
|
|
|
################################## |
|
905
|
|
|
for f in args.file: |
|
906
|
|
|
fetch_cif(f) |
|
907
|
|
|
|
|
908
|
|
|
if args.collapsed_view or args.cv: |
|
909
|
|
|
collapsed_view(args) |
|
910
|
|
|
|
|
911
|
|
|
if args.delete_anisou: |
|
912
|
|
|
# quick fix - make a list on the spot |
|
913
|
|
|
if list != type(args.file): |
|
914
|
|
|
args.file = [args.file] |
|
915
|
|
|
################################## |
|
916
|
|
|
for f in args.file: |
|
917
|
|
|
if args.inplace: |
|
918
|
|
|
shutil.copy(f, f + '~') |
|
919
|
|
|
|
|
920
|
|
|
s = RNAStructure(f) |
|
921
|
|
|
|
|
922
|
|
|
output = '' |
|
923
|
|
|
if not args.no_hr: |
|
924
|
|
|
output += add_header(version) + '\n' |
|
925
|
|
|
|
|
926
|
|
|
for l in s.lines: |
|
927
|
|
|
if l.startswith('ANISOU'): |
|
928
|
|
|
continue |
|
929
|
|
|
else: |
|
930
|
|
|
output += l + '\n' |
|
931
|
|
|
|
|
932
|
|
|
if args.inplace: |
|
933
|
|
|
with open(f, 'w') as f: |
|
934
|
|
|
f.write(output) |
|
935
|
|
|
else: # write: to stdout |
|
936
|
|
|
try: |
|
937
|
|
|
sys.stdout.write(output) |
|
938
|
|
|
sys.stdout.flush() |
|
939
|
|
|
except IOError: |
|
940
|
|
|
pass |
|
941
|
|
|
|
|
942
|
|
|
|
|
943
|
|
|
if args.swap_chains: |
|
944
|
|
|
# quick fix - make a list on the spot |
|
945
|
|
|
if list != type(args.file): |
|
946
|
|
|
args.file = [args.file] |
|
947
|
|
|
################################## |
|
948
|
|
|
for f in args.file: |
|
949
|
|
|
if args.inplace: |
|
950
|
|
|
shutil.copy(f, f + '~') |
|
951
|
|
|
# rename_chain 'A>B' |
|
952
|
|
|
s = RNAStructure(f) |
|
953
|
|
|
output = '' |
|
954
|
|
|
if not args.no_hr: |
|
955
|
|
|
output += add_header(version) + '\n' |
|
956
|
|
|
|
|
957
|
|
|
chain_id_old, chain_id_new = args.swap_chains.split('>') |
|
958
|
|
|
if chain_id_old == chain_id_new: |
|
959
|
|
|
output = open(f).read() # return the file ;-) itself unchanged |
|
960
|
|
|
else: |
|
961
|
|
|
s.rename_chain(chain_id_new, '_') |
|
962
|
|
|
s.rename_chain(chain_id_old, chain_id_new) |
|
963
|
|
|
output += s.rename_chain('_', chain_id_old) |
|
964
|
|
|
|
|
965
|
|
|
if args.inplace: |
|
966
|
|
|
with open(f, 'w') as f: |
|
967
|
|
|
f.write(output) |
|
968
|
|
|
else: # write: to stdout |
|
969
|
|
|
try: |
|
970
|
|
|
sys.stdout.write(output) |
|
971
|
|
|
sys.stdout.flush() |
|
972
|
|
|
except IOError: |
|
973
|
|
|
pass |
|
974
|
|
|
|
|
975
|
|
|
if args.rename_chain: |
|
976
|
|
|
# quick fix - make a list on the spot |
|
977
|
|
|
if list != type(args.file): |
|
978
|
|
|
args.file = [args.file] |
|
979
|
|
|
################################## |
|
980
|
|
|
for f in args.file: |
|
981
|
|
|
if args.inplace: |
|
982
|
|
|
shutil.copy(f, f + '~') |
|
983
|
|
|
# rename_chain 'A>B' |
|
984
|
|
|
s = RNAStructure(f) |
|
985
|
|
|
chain_id_old, chain_id_new = args.rename_chain.split('>') |
|
986
|
|
|
output = '' |
|
987
|
|
|
if not args.no_hr: |
|
988
|
|
|
output += add_header(version) + '\n' |
|
989
|
|
|
output += s.rename_chain(chain_id_old, chain_id_new) |
|
990
|
|
|
if args.inplace: |
|
991
|
|
|
with open(f, 'w') as f: |
|
992
|
|
|
f.write(output) |
|
993
|
|
|
else: # write: to stdout |
|
994
|
|
|
try: |
|
995
|
|
|
sys.stdout.write(output) |
|
996
|
|
|
sys.stdout.flush() |
|
997
|
|
|
except IOError: |
|
998
|
|
|
pass |
|
999
|
|
|
|
|
1000
|
|
|
|
|
1001
|
|
|
if args.split_alt_locations: |
|
1002
|
|
|
# quick fix - make a list on the spot |
|
1003
|
|
|
if list != type(args.file): |
|
1004
|
|
|
args.file = [args.file] |
|
1005
|
|
|
################################## |
|
1006
|
|
|
for f in args.file: |
|
1007
|
|
|
if args.inplace: |
|
1008
|
|
|
shutil.copy(f, f + '~') |
|
1009
|
|
|
|
|
1010
|
|
|
#s = RNAStructure(f) |
|
1011
|
|
|
s = open(f) |
|
1012
|
|
|
output = '' |
|
1013
|
|
|
if not args.no_hr: |
|
1014
|
|
|
output += add_header(version) + '\n' |
|
1015
|
|
|
|
|
1016
|
|
|
# First, collect all alternative locations. |
|
1017
|
|
|
alts = set() |
|
1018
|
|
|
for l in s: |
|
1019
|
|
|
if l.startswith('ATOM'): |
|
1020
|
|
|
if l[16].strip(): |
|
1021
|
|
|
alts.add(l[16]) |
|
1022
|
|
|
s.close() |
|
1023
|
|
|
|
|
1024
|
|
|
if args.verbose: |
|
1025
|
|
|
print('alts:', alts) |
|
1026
|
|
|
|
|
1027
|
|
|
for index, alt in enumerate(alts): |
|
1028
|
|
|
output += 'MODEL %i' % (index + 1) |
|
1029
|
|
|
s = open(f) |
|
1030
|
|
|
for l in s: |
|
1031
|
|
|
if l.startswith('ATOM') or l.startswith('HETATM'): |
|
1032
|
|
|
# if empty, then print this line |
|
1033
|
|
|
if l[16] == ' ': |
|
1034
|
|
|
output += l |
|
1035
|
|
|
if l[16] == alt: |
|
1036
|
|
|
output += l |
|
1037
|
|
|
else: |
|
1038
|
|
|
output += l |
|
1039
|
|
|
output += 'ENDMDL\n' |
|
1040
|
|
|
s.close() |
|
1041
|
|
|
|
|
1042
|
|
|
if args.inplace: |
|
1043
|
|
|
with open(f, 'w') as f: |
|
1044
|
|
|
f.write(output) |
|
1045
|
|
|
else: # write: to stdout |
|
1046
|
|
|
try: |
|
1047
|
|
|
sys.stdout.write(output) |
|
1048
|
|
|
sys.stdout.flush() |
|
1049
|
|
|
except IOError: |
|
1050
|
|
|
pass |
|
1051
|
|
|
|
|
1052
|
|
|
if args.orgmode: |
|
1053
|
|
|
if args.inplace: |
|
1054
|
|
|
shutil.copy(args.file, args.file + '~') |
|
1055
|
|
|
s = RNAStructure(args.file) |
|
1056
|
|
|
s.decap_gtp() |
|
1057
|
|
|
s.std_resn() |
|
1058
|
|
|
s.remove_hydrogen() |
|
1059
|
|
|
s.remove_ion() |
|
1060
|
|
|
s.remove_water() |
|
1061
|
|
|
s.fix_op_atoms() |
|
1062
|
|
|
s.renum_atoms() |
|
1063
|
|
|
s.shift_atom_names() |
|
1064
|
|
|
s.prune_elements() |
|
1065
|
|
|
# print s.get_preview() |
|
1066
|
|
|
# s.write(args.outfile) |
|
1067
|
|
|
# for l in s.lines: |
|
1068
|
|
|
# print l |
|
1069
|
|
|
|
|
1070
|
|
|
remarks = s.get_rnapuzzle_ready( |
|
1071
|
|
|
args.renumber_residues, fix_missing_atoms=True, rename_chains=True, verbose=args.verbose) |
|
1072
|
|
|
|
|
1073
|
|
|
with open(args.file + '~', 'w') as f: |
|
1074
|
|
|
if not args.no_hr: |
|
1075
|
|
|
f.write(add_header(version) + '\n') |
|
1076
|
|
|
|
|
1077
|
|
|
f.write('\n'.join(remarks) + '\n') |
|
1078
|
|
|
f.write(s.get_text()) |
|
1079
|
|
|
|
|
1080
|
|
|
try: |
|
1081
|
|
|
from Bio import PDB |
|
1082
|
|
|
from Bio.PDB import PDBIO |
|
1083
|
|
|
import warnings |
|
1084
|
|
|
warnings.filterwarnings('ignore', '.*Invalid or missing.*',) |
|
1085
|
|
|
warnings.filterwarnings('ignore', '.*with given element *',) |
|
1086
|
|
|
except: |
|
1087
|
|
|
sys.exit('Error: Install biopython to use this function (pip biopython)') |
|
1088
|
|
|
|
|
1089
|
|
|
parser = PDB.PDBParser() |
|
1090
|
|
|
struct = parser.get_structure('', args.file + '~') |
|
1091
|
|
|
model = struct[0] |
|
1092
|
|
|
|
|
1093
|
|
|
# chains [['A', 'seq', [residues]]] |
|
1094
|
|
|
chains = [] |
|
1095
|
|
|
for c in model.get_list(): |
|
1096
|
|
|
seq = '' |
|
1097
|
|
|
chain = [] |
|
1098
|
|
|
for r in c: |
|
1099
|
|
|
chain.append(str(r.get_resname().strip()) + str(r.id[1])) |
|
1100
|
|
|
seq += r.get_resname().strip() |
|
1101
|
|
|
chains.append([c.id, seq, chain]) |
|
1102
|
|
|
|
|
1103
|
|
|
t = [] |
|
1104
|
|
|
#[['A', 'CCGCCGCGCCAUGCCUGUGGCGG', ['C1', 'C2', 'G3', 'C4', 'C5', 'G6', 'C7', 'G8', 'C9', 'C10', 'A11', 'U12', 'G13', 'C14', 'C15', 'U16', 'G17', 'U18', 'G19', 'G20', 'C21', 'G22', 'G23']], ['B', 'CCGCCGCGCCAUGCCUGUGGCGG', ['C1', 'C2', 'G3', 'C4', 'C5', 'G6', 'C7', 'G8', 'C9', 'C10', 'A11', 'U12', 'G13', 'C14', 'C15', 'U16', 'G17', 'U18', 'G19', 'G20', 'C21', 'G22', 'G23']]] |
|
1105
|
|
|
for c in chains: |
|
1106
|
|
|
t.append('* ' + c[0] + ':' + c[2][0][1:] + '-' + c[2][-1][1:] + ' ' + c[1]) |
|
1107
|
|
|
for r in c[2]: |
|
1108
|
|
|
t.append('** ' + c[0] + ':' + r) |
|
1109
|
|
|
print('\n'.join(t)) |
|
1110
|
|
|
|
|
1111
|
|
|
if args.fix: |
|
1112
|
|
|
# quick fix - make a list on the spot |
|
1113
|
|
|
if list != type(args.file): |
|
1114
|
|
|
args.file = [args.file] |
|
1115
|
|
|
################################## |
|
1116
|
|
|
for f in args.file: |
|
1117
|
|
|
cmd = 'pdbfixer ' + f + ' --add-atoms all --add-residues' |
|
1118
|
|
|
print(cmd) |
|
1119
|
|
|
os.system(cmd) |
|
1120
|
|
|
if args.inplace: |
|
1121
|
|
|
shutil.move("output.pdb", f) |
|
1122
|
|
|
else: |
|
1123
|
|
|
shutil.move("output.pdb", f.replace('.pdb', '_fx.pdb')) |
|
1124
|
|
|
|
|
1125
|
|
|
if args.to_mol2: |
|
1126
|
|
|
# quick fix - make a list on the spot |
|
1127
|
|
|
if list != type(args.file): |
|
1128
|
|
|
args.file = [args.file] |
|
1129
|
|
|
################################## |
|
1130
|
|
|
for f in args.file: |
|
1131
|
|
|
cmd = 'obabel -i pdb ' + f + ' -o mol2 -O ' + f.replace('.pdb', '.mol2') |
|
1132
|
|
|
print(cmd) |
|
1133
|
|
|
os.system(cmd) |
|
1134
|
|
|
|
|
1135
|
|
|
|
|
1136
|
|
|
if args.nmr_dir: |
|
1137
|
|
|
files = sort_strings(glob.glob(args.nmr_dir + '/' + args.file)) |
|
1138
|
|
|
|
|
1139
|
|
|
c = 1 |
|
1140
|
|
|
|
|
1141
|
|
|
for f in files: |
|
1142
|
|
|
#s = RNAStructure(f) |
|
1143
|
|
|
print("MODEL " + str(c)) |
|
1144
|
|
|
# at some point I could use RNAStructure for this |
|
1145
|
|
|
print(open(f).read()) |
|
1146
|
|
|
#print(s.get_text(add_end=False)) |
|
1147
|
|
|
print('ENDMDL') |
|
1148
|
|
|
c += 1 |
|
1149
|
|
|
print('END') |
|
1150
|
|
|
|
|
1151
|
|
|
if args.set_chain: |
|
1152
|
|
|
if list != type(args.file): |
|
1153
|
|
|
args.file = [args.file] |
|
1154
|
|
|
|
|
1155
|
|
|
for f in args.file: |
|
1156
|
|
|
txt = set_chain_for_struc(f, args.set_chain) |
|
1157
|
|
|
if args.inplace: |
|
1158
|
|
|
shutil.move(f, f.replace('.pdb', '.pdb~')) |
|
1159
|
|
|
with open(f, 'w') as fi: |
|
1160
|
|
|
fi.write(txt) |
|
1161
|
|
|
else: |
|
1162
|
|
|
print(txt) |
|
1163
|
|
|
|
|
1164
|
|
|
if args.remove0: |
|
1165
|
|
|
if list != type(args.file): |
|
1166
|
|
|
args.file = [args.file] |
|
1167
|
|
|
|
|
1168
|
|
|
for f in args.file: |
|
1169
|
|
|
s = RNAStructure(f) |
|
1170
|
|
|
txt = '' |
|
1171
|
|
|
for l in s.lines: |
|
1172
|
|
|
if s.get_atom_coords(l) == (0.0, 0.0, 0.0): |
|
1173
|
|
|
continue |
|
1174
|
|
|
else: |
|
1175
|
|
|
txt += l + '\n' |
|
1176
|
|
|
|
|
1177
|
|
|
if args.inplace: |
|
1178
|
|
|
shutil.move(f, f.replace('.pdb', '.pdb~')) |
|
1179
|
|
|
with open(f, 'w') as fi: |
|
1180
|
|
|
fi.write(txt) |
|
1181
|
|
|
else: |
|
1182
|
|
|
print(txt) |
|
1183
|
|
|
|
|
1184
|
|
|
if args.replace_htm: |
|
1185
|
|
|
if list != type(args.file): |
|
1186
|
|
|
args.file = [args.file] |
|
1187
|
|
|
|
|
1188
|
|
|
for f in args.file: |
|
1189
|
|
|
with open(f) as fn: |
|
1190
|
|
|
txt = fn.read() |
|
1191
|
|
|
txt = txt.replace('HETATM', 'ATOM ') |
|
1192
|
|
|
if args.inplace: |
|
1193
|
|
|
shutil.move(f, f.replace('.pdb', '.pdb~')) |
|
1194
|
|
|
with open(f, 'w') as fi: |
|
1195
|
|
|
fi.write(txt) |
|
1196
|
|
|
else: |
|
1197
|
|
|
print(txt) |
|
1198
|
|
|
|
|
1199
|
|
|
if args.rgyration: |
|
1200
|
|
|
if list != type(args.file): |
|
1201
|
|
|
args.file = [args.file] |
|
1202
|
|
|
# quick fix - make a list on the spot |
|
1203
|
|
|
if list != type(args.file): |
|
1204
|
|
|
args.file = [args.file] |
|
1205
|
|
|
################################## |
|
1206
|
|
|
analyzed = [] |
|
1207
|
|
|
for f in args.file: |
|
1208
|
|
|
##################################### |
|
1209
|
|
|
if args.uniq: |
|
1210
|
|
|
subname = eval('f' + args.uniq) |
|
1211
|
|
|
if subname in analyzed: |
|
1212
|
|
|
continue |
|
1213
|
|
|
else: |
|
1214
|
|
|
analyzed.append(subname) |
|
1215
|
|
|
|
|
1216
|
|
|
s = RNAStructure(f) |
|
1217
|
|
|
if not s.is_pdb(): |
|
1218
|
|
|
print('Error: Not a PDB file %s' % f) |
|
1219
|
|
|
sys.exit(1) |
|
1220
|
|
|
s.decap_gtp() |
|
1221
|
|
|
s.std_resn() |
|
1222
|
|
|
s.remove_hydrogen() |
|
1223
|
|
|
s.remove_ion() |
|
1224
|
|
|
s.remove_water() |
|
1225
|
|
|
if args.renum_atoms: |
|
1226
|
|
|
s.renum_atoms() |
|
1227
|
|
|
s.fix_O_in_UC() |
|
1228
|
|
|
s.fix_op_atoms() |
|
1229
|
|
|
|
|
1230
|
|
|
output = '' |
|
1231
|
|
|
|
|
1232
|
|
|
# with # is easier to grep this out |
|
1233
|
|
|
if args.fasta: |
|
1234
|
|
|
# s.fn vs s.name |
|
1235
|
|
|
output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, fasta=args.fasta, addfn=s.name, color=args.color_seq) + '\n' |
|
1236
|
|
|
elif args.oneline: |
|
1237
|
|
|
output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq).strip() + ' # '+ os.path.basename(f.replace('.pdb', '')) + '\n' |
|
1238
|
|
|
else: |
|
1239
|
|
|
N = len(s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq)) |
|
1240
|
|
|
output += '' + os.path.basename(f.replace('.pdb', '')).ljust(60) + ' seq len: ' + str(N) + ' ' |
|
1241
|
|
|
try: |
|
1242
|
|
|
sys.stdout.write(output) |
|
1243
|
|
|
sys.stdout.flush() |
|
1244
|
|
|
except IOError: |
|
1245
|
|
|
pass |
|
1246
|
|
|
|
|
1247
|
|
|
import math |
|
1248
|
|
|
def calc_rg(): |
|
1249
|
|
|
"""modified after https://github.com/sarisabban/Rg""" |
|
1250
|
|
|
coord = list() |
|
1251
|
|
|
mass = list() |
|
1252
|
|
|
for line in s.lines: |
|
1253
|
|
|
try: |
|
1254
|
|
|
line = line.split() |
|
1255
|
|
|
x = float(line[6]) |
|
1256
|
|
|
y = float(line[7]) |
|
1257
|
|
|
z = float(line[8]) |
|
1258
|
|
|
coord.append([x, y, z]) |
|
1259
|
|
|
atom = line[2][0] |
|
1260
|
|
|
if atom == 'C': |
|
1261
|
|
|
mass.append(12.0107) |
|
1262
|
|
|
elif atom == 'O': |
|
1263
|
|
|
mass.append(15.9994) |
|
1264
|
|
|
elif atom == 'N': |
|
1265
|
|
|
mass.append(14.0067) |
|
1266
|
|
|
elif atom == 'S': |
|
1267
|
|
|
mass.append(32.065) |
|
1268
|
|
|
elif atom == 'P': |
|
1269
|
|
|
mass.append(30.97) |
|
1270
|
|
|
except: |
|
1271
|
|
|
pass |
|
1272
|
|
|
|
|
1273
|
|
|
xm = [(m*i, m*j, m*k) for (i, j, k), m in zip(coord, mass)] |
|
1274
|
|
|
tmass = sum(mass) |
|
1275
|
|
|
rr = sum(mi*i + mj*j + mk*k for (i, j, k), (mi, mj, mk) in zip(coord, xm)) |
|
1276
|
|
|
mm = sum((sum(i) / tmass)**2 for i in zip(*xm)) |
|
1277
|
|
|
rg = math.sqrt(rr / tmass-mm) |
|
1278
|
|
|
return(round(rg, 3)) |
|
1279
|
|
|
|
|
1280
|
|
|
def radius_of_gyration(N): |
|
1281
|
|
|
return round(5.5 * (N ** (1/3)), 2) |
|
1282
|
|
|
|
|
1283
|
|
|
rg = calc_rg() |
|
1284
|
|
|
rgn = radius_of_gyration(N) |
|
|
|
|
|
|
1285
|
|
|
print(f'radius of gyration: {rg}; expected for this seqlen ({N}): {rgn}') |
|
1286
|
|
|
|
|
1287
|
|
|
if args.renum_nmr: |
|
1288
|
|
|
if list != type(args.file): |
|
1289
|
|
|
args.file = [args.file] |
|
1290
|
|
|
txt = '' |
|
1291
|
|
|
for f in args.file: |
|
1292
|
|
|
c = 1 |
|
1293
|
|
|
for l in open(f): |
|
1294
|
|
|
if l.startswith('MODEL'): |
|
1295
|
|
|
txt += "MODEL " + str(c) + '\n' |
|
1296
|
|
|
c += 1 |
|
1297
|
|
|
elif l.strip() == 'END': |
|
1298
|
|
|
pass |
|
1299
|
|
|
else: |
|
1300
|
|
|
txt += l |
|
1301
|
|
|
if args.inplace: |
|
1302
|
|
|
shutil.move(f, f.replace('.pdb', '.pdb~')) |
|
|
|
|
|
|
1303
|
|
|
with open(f) as fi: |
|
1304
|
|
|
fi.write(txt) |
|
1305
|
|
|
else: |
|
1306
|
|
|
print(txt) |
|
1307
|
|
|
|
|
1308
|
|
|
from rna_tools.rna_tools_config import PYMOL_PATH |
|
1309
|
|
|
sys.path.insert(0, PYMOL_PATH) |
|
1310
|
|
View Code Duplication |
if args.cif2pdb: |
|
|
|
|
|
|
1311
|
|
|
# quick fix - make a list on the spot |
|
1312
|
|
|
if list != type(args.file): |
|
1313
|
|
|
args.file = [args.file] |
|
1314
|
|
|
################################## |
|
1315
|
|
|
for cif_file in args.file: |
|
1316
|
|
|
from Bio.PDB import MMCIFParser, PDBIO |
|
1317
|
|
|
parser = MMCIFParser() |
|
1318
|
|
|
structure = parser.get_structure("structure_id", cif_file) |
|
1319
|
|
|
pdb_file = cif_file.replace('.cif', '_fCIF.pdb') |
|
1320
|
|
|
|
|
1321
|
|
|
|
|
1322
|
|
|
try: |
|
1323
|
|
|
# Save to PDB format |
|
1324
|
|
|
io = PDBIO() |
|
1325
|
|
|
io.set_structure(structure) |
|
1326
|
|
|
io.save(pdb_file) |
|
1327
|
|
|
|
|
1328
|
|
|
print(f'saved: {pdb_file}') |
|
1329
|
|
|
# open a file add remarks |
|
1330
|
|
|
new_file = '' |
|
1331
|
|
|
with open(pdb_file, 'r') as f: |
|
1332
|
|
|
if not args.no_hr: |
|
1333
|
|
|
new_file += add_header(version) + '\n' |
|
1334
|
|
|
new_file += f.read() |
|
1335
|
|
|
|
|
1336
|
|
|
with open(pdb_file, 'w') as f: |
|
1337
|
|
|
f.write(new_file) |
|
1338
|
|
|
|
|
1339
|
|
|
except: |
|
1340
|
|
|
print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.') |
|
1341
|
|
|
|
|
1342
|
|
|
def has_high_rna_content(chain, threshold=0.8): |
|
1343
|
|
|
# RNA nucleotides: A, C, G, U, and X (you can modify as needed) |
|
1344
|
|
|
rna_nucleotides = ['A', 'C', 'G', 'U', 'X'] |
|
1345
|
|
|
total_residues = 0 |
|
1346
|
|
|
rna_residues = 0 |
|
1347
|
|
|
|
|
1348
|
|
|
# Count the total number of residues and RNA-like residues |
|
1349
|
|
|
for residue in chain: |
|
1350
|
|
|
total_residues += 1 |
|
1351
|
|
|
if residue.get_resname().strip() in rna_nucleotides: |
|
1352
|
|
|
rna_residues += 1 |
|
1353
|
|
|
|
|
1354
|
|
|
# Calculate the proportion of RNA residues |
|
1355
|
|
|
if total_residues == 0: |
|
1356
|
|
|
return False # Avoid division by zero if chain has no residues |
|
1357
|
|
|
|
|
1358
|
|
|
rna_percentage = rna_residues / total_residues |
|
1359
|
|
|
|
|
1360
|
|
|
# Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default) |
|
1361
|
|
|
return rna_percentage >= threshold |
|
1362
|
|
|
|
|
1363
|
|
|
from Bio.PDB.MMCIFParser import MMCIFParser |
|
1364
|
|
|
from Bio.PDB import MMCIFParser, Structure, Model, Chain |
|
1365
|
|
|
|
|
1366
|
|
|
# Initialize the parser |
|
1367
|
|
|
parser = MMCIFParser() |
|
1368
|
|
|
|
|
1369
|
|
|
# Parse the structure |
|
1370
|
|
|
structure = parser.get_structure("structure", cif_file) |
|
1371
|
|
|
|
|
1372
|
|
|
# Create a list of single-letter chain identifiers |
|
1373
|
|
|
import string |
|
1374
|
|
|
letters = list(string.ascii_uppercase) |
|
1375
|
|
|
|
|
1376
|
|
|
for model in structure: |
|
1377
|
|
|
for chain in model: |
|
1378
|
|
|
if has_high_rna_content(chain): |
|
1379
|
|
|
# New structure |
|
1380
|
|
|
new_structure = Structure.Structure("new_structure") |
|
1381
|
|
|
new_model = Model.Model(0) # Create a new model |
|
1382
|
|
|
new_structure.add(new_model) # Add the new model to the new structure |
|
1383
|
|
|
|
|
1384
|
|
|
chain_id_new = letters.pop(0) |
|
1385
|
|
|
chain_id = chain.get_id() |
|
1386
|
|
|
|
|
1387
|
|
|
atom_count = 0 |
|
1388
|
|
|
for residue in chain: |
|
1389
|
|
|
for atom in residue: |
|
1390
|
|
|
atom_count += 1 |
|
1391
|
|
|
|
|
1392
|
|
|
remarks = [] |
|
1393
|
|
|
remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}') |
|
1394
|
|
|
|
|
1395
|
|
|
pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb') |
|
1396
|
|
|
print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}') |
|
1397
|
|
|
|
|
1398
|
|
|
chain.id = chain_id_new |
|
1399
|
|
|
new_model.add(chain) |
|
1400
|
|
|
|
|
1401
|
|
|
io = PDBIO() |
|
1402
|
|
|
io.set_structure(new_structure) |
|
1403
|
|
|
|
|
1404
|
|
|
io.save(pdb_file) |
|
1405
|
|
|
# open a file add remarks |
|
1406
|
|
|
new_file = '' |
|
1407
|
|
|
with open(pdb_file, 'r') as f: |
|
1408
|
|
|
if not args.no_hr: |
|
1409
|
|
|
new_file += add_header(version) + '\n' |
|
1410
|
|
|
if remarks: |
|
1411
|
|
|
new_file += '\n'.join(remarks) + '\n' |
|
1412
|
|
|
new_file += f.read() |
|
1413
|
|
|
|
|
1414
|
|
|
with open(pdb_file, 'w') as f: |
|
1415
|
|
|
f.write(new_file) |
|
1416
|
|
|
|
|
1417
|
|
|
if args.pdb2cif: |
|
1418
|
|
|
try: |
|
1419
|
|
|
from pymol import cmd |
|
1420
|
|
|
except ImportError: |
|
1421
|
|
|
print('This functionality needs PyMOL. Install it or if installed, check your setup') |
|
1422
|
|
|
sys.exit(1) |
|
1423
|
|
|
|
|
1424
|
|
|
# quick fix - make a list on the spot |
|
1425
|
|
|
if list != type(args.file): |
|
1426
|
|
|
args.file = [args.file] |
|
1427
|
|
|
################################## |
|
1428
|
|
|
for f in args.file: |
|
1429
|
|
|
cmd.load(f) |
|
1430
|
|
|
fo = f.replace('.pdb', '.cif') |
|
1431
|
|
|
cmd.save(fo, '(all)') |
|
1432
|
|
|
cmd.delete('all') |
|
1433
|
|
|
print(fo, 'saved') |
|
1434
|
|
|
|