build.rna_tools.tools.md.rnakb_utils.set_res_code() - Code Metrics - mmagnus/rna-tools - Measure and Improve Code Quality continuously with Scrutinizer

set_res_code() A
last analyzed 2025-07-29 00:29 UTC

↳ Parent: build.rna_tools.tools.md.rnakb_utils

Complexity

Conditions

Size

Total Lines	4
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
eloc	2
nop	2
dl	0
loc	4
rs	10
c	0
b	0
f	0

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""RNAkb (previous Gromacs) utils.

A module with different functions needed for Gromacs/RNAkb merriage.

Marcin Magnus
Albert Bogdanowicz

(1) prepare groups and then (2) mdp score file.
"""

import re
import os
import argparse
import rna_tools.tools.pdb_formatix.PDBFile as pf

LIB_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + os.sep
VERBOSE = False
MDP_TEMPLATE = 'data/score_rnakb_orig.mdp'

GROMACS_ALLOWED_5 = ("H5T", "O5'", "C5'", "H5'1", "H5'2", "C4'", "H4'",
        "O4'", "C1'", "H1'", "N9", "C8", "H8", "N7", "C5", "C6", "N6", "H61",
        "H62", "N1", "C2", "H2", "N3", "C4", "C3'", "H3'", "C2'", "H2'1",
        "O2'", "HO'2", "O3'",
        # new atoms
        "O2", "O4", "O6", "N2", "N4",
        'OP1', 'OP2', 'OP3', 'H21', 'H22', "H2'", "H5'", "HO5'", "H5''", "HO2'",
        'P', 'H3', 'H1', 'H6', 'H5', 'H42', 'H41',
        )

GROMACS_ALLOWED_MIDDLE = ("P", "O1P", "O2P", "O5'", "C5'", "H5'1", "H5'2",
       "C4'", "H4'", "O4'", "C1'", "H1'", "N9", "C8", "H8", "N7", "C5", "C6",
        "N6", "H61", "H62", "N1", "C2", "H2", "N3", "C4", "C3'", "H3'", "C2'",
        "H2'1", "O2'", "HO'2", "O3'",
        # new atoms
        "O2", "O4", "O6", "N2", "N4",
        'OP1', 'OP2', 'OP3', 'H21', 'H22', 'O6', "H2'", "H5'", "HO5'", "H5''", "HO2'",
        'P', 'H3', 'H1', 'H6', 'H5', 'H42', 'H41',
        )

GROMACS_REQUIRED_GROUPS = ("aP", "aC4s", "aC2", "aC4", "aC6",
                           "uP", "uC4s", "uC2", "uC4", "uC6",
                           "gP", "gC4s", "gC2", "gC4", "gC6",
                           "cP", "cC4s", "cC2", "cC4", "cC6",
                           "RNA_5pt", "other"
        )


def get_res_num(line):
    """Extract residue number from a line of PDB file

    Arguments:
      * line = ATOM line from a PDB file

    Output:
      * residue number as an integer
    """
    return int(''.join([x for x in line[22:27] if x.isdigit()]))


def get_res_code(line):
    """Get residue code from a line of a PDB file
    """
    return line[17:20]

def set_res_code(line, code):
    """Set residue code from a line of a PDB file
    """
    return line[:17] + code.rjust(3) + line[20:]

def make_rna_gromacs_ready(pdb_string, verbose=VERBOSE):
    """GROMACS has some special requirements for PDB files.

    Arguments:
      * pdb_string = contents of PDB file as a string

    Output:
      * new PDB returned as a string

    (!!!) # hmm... [ RA5 ] will not be detected based on it (!?)
    Hmm.. becase it detects if the structure is already prepared.
    """
    #pdb_string = resname_3to1(pdb_string)
    #pdb_string = remove_hetatms(pdb_string)
    result = []
    pdb_lines = pdb_string.split('\n')

    # find smallest residue number # ugly this does not work for 2 chains
    min_res = min(list(map(get_res_num,
        [l for l in pdb_lines if l.startswith('ATOM')])))
    max_res = max(list(map(get_res_num,
        [l for l in pdb_lines if l.startswith('ATOM')])))

    for l in pdb_lines:
        if l.startswith('ATOM') and l[17:20].strip() in ('A', 'U', 'C', 'G'): # hmm... [ RA5 ] will not be detected based on it (!?)
            res = get_res_code(l)        
            #if res.startswith('R') and res.endswith('5') : # it's RX5 file so skip fixing
            #    if verbose:
            #        print('-- already gromacs ready')
            #    return pdb_string

            if l.startswith('TER'):
                continue

            l = l.replace('*', '\'')
            l = l.replace('O1P', 'OP1')
            l = l.replace('O2P', 'OP2')

            res_num = get_res_num(l)
            atom_type = l.split()[2].strip()

            # remove P OP1 OP2
            if res_num == min_res and atom_type == 'P':
                continue
            if res_num == min_res and atom_type == 'OP1':
                continue
            if res_num == min_res and atom_type == 'OP2':
                continue

            # convert G -> RG5, RG3
            # do this only if res does not start with R
            cres = get_res_code(l).strip()
            # print(cres)
            # aaaaaaaaa
            if not cres.startswith('R'):
                if res_num == min_res: # RG5
                    l = set_res_code(l, 'R' + get_res_code(l).strip() + '5')
                elif res_num == max_res: # RG3
                    l = set_res_code(l, 'R' + get_res_code(l).strip() + '3')
                else:
                    l = set_res_code(l, ' R' + get_res_code(l).strip())

            if res_num == min_res:
                if atom_type in GROMACS_ALLOWED_5:
                    result.append(l)
                else:
                    print('Wrong start line: ', l, atom_type)
            else:
                if atom_type in GROMACS_ALLOWED_MIDDLE:
                    result.append(l)
                else:
                    print('Wrong middle line: ', l, atom_type)
        else:
            result.append(l)
    return '\n'.join(result)

def make_rna_rnakb_ready(pdb_string, verbose=VERBOSE):
    """RNAkb read (difference between this function and 
    make_rna_gromacs_ready is ignoring R5U etc. RNAkb does not treat
    them differently so there is no point to distinguish them.

    Arguments:
      * pdb_string = contents of PDB file as a string

    Output:
      * new PDB returned as a string
    """
    #pdb_string = resname_3to1(pdb_string)
    #pdb_string = remove_hetatms(pdb_string)
    result = []
    pdb_lines = pdb_string.split('\n')

    # find smallest residue number
    min_res = min(list(map(get_res_num,
        [l for l in pdb_lines if l.startswith('ATOM')])))
    max_res = max(list(map(get_res_num,
        [l for l in pdb_lines if l.startswith('ATOM')])))

    for l in pdb_lines:
        if l.startswith('ATOM'):# and l[19] in ('A', 'U', 'C', 'G'):
            res = get_res_code(l)
            #if res.startswith('R') and res.endswith('5') : # it's RX5 file so skip fixing
            #    if verbose:
            #        print '-- already gromacs ready'
            #    return pdb_string

            l = l.replace('*', '\'')
            l = l.replace('O1P', 'OP1')
            l = l.replace('O2P', 'OP2')

            res_num = get_res_num(l)
            atom_type = l.split()[2].strip()
            # remove P OP1 OP2
            if res_num == min_res and atom_type == 'P':
                continue
            if res_num == min_res and atom_type == 'OP1':
                continue
            if res_num == min_res and atom_type == 'OP2':
                continue

            # convert G -> RG5, RG3
            #if res_num == min_res: # RG5
            #    l = set_res_code(l, 'R' + get_res_code(l).strip() + '5')
            #elif res_num == max_res: # RG3
            #    l = set_res_code(l, 'R' + get_res_code(l).strip() + '3')
            #else:
            l = set_res_code(l, ' R' + get_res_code(l).strip().replace('R','').replace('3','').replace('5','')) # 
            if res_num == min_res:
                if atom_type in GROMACS_ALLOWED_5:
                    result.append(l)
                else:
                    print(('Wrong start line: ', l, atom_type))
            else:
                if atom_type in GROMACS_ALLOWED_MIDDLE:
                    result.append(l)
                else:
                    print(('Wrong middle line: ', l, atom_type))
        else: # keep TER, etc.
            result.append(l)
    return '\n'.join(result)

def fix_gromacs_gro(path, verbose=False):
    """It's probably a bug in GROMACS, but box coordinates in gro files are
    not always separated by spaces. This function guesses how it should be
    separated and inserts spaces.

    Arguments:
      * path = path to gro file

    Output:
      * file is overwritten with a corrected one
    """
    f = open(path)
    gro = f.read()
    f.close()
    gro_lines = gro.split('\n')
    last_line = gro_lines[-2]

    # check if there are a space
    if last_line.find(' ') == -1:
        dots = [i.start() for i in re.finditer('\\.', last_line)]
        # next 4 lines are a guess, I hope it works
        digits = len(last_line[dots[2]:])
        box = [last_line[:dots[0] + digits],
            last_line[dots[0] + digits:dots[1] + digits], last_line[dots[1] + digits:]]
        gro_lines = gro_lines[:-2]
        gro_lines.append(' '.join(box))
        gro_lines.append('')
        f = open(path, 'w')
        f.write('\n'.join(gro_lines))
        f.close()


def fix_gromacs_ndx(path):
    """Sometimes, GROMACS index has some atoms in more than one group, or
    doesn't have all the groups grompp requires. This function fixes that.

    Arguments:
      * path = path to index file

    Output:
      * index is overwritten with a corrected one
    """
    f = open(path)
    index = f.read()
    f.close()
    # split file into system group and the rest
    system_group = index.split('[ System ]')[1].split('[')[0]
    other_groups = ['[' + i for i in
            index.split('[ System ]')[1].split('[')[1:]]
    # remove duplicate numbers
    taken_atoms = []
    for g in range(len(other_groups)):
        header = other_groups[g].split('\n')[0]
        group = other_groups[g].split('\n')[1].split()
        group = [a for a in group if a not in taken_atoms]
        taken_atoms.extend(group)
        other_groups[g] = header + '\n' + ' '.join(group) + '\n'
    # build result, part 1
    result = ['[ System ]' + system_group]
    result.extend(other_groups)
    # add missing groups, leave them empty
    headers = [g.split('\n')[0][2:-2] for g in other_groups]
    missing_headers = [h for h in GROMACS_REQUIRED_GROUPS if h not in headers]
    result.extend(['[ %s ]\n' % h for h in missing_headers])
    # write result to file
    result = ''.join(result)
    f = open(path, 'w')
    f.write(result)
    f.close()


def prepare_groups(fn, gr_fn, potential='aa', verbose=False):
    """ Prepare an index for fn file. gr_fn is a file where gtxt is saved in.

    Get seq and uniq & sort it.
    ``['RG5', 'RA', 'RA', 'RA', 'RG', 'RU', 'RA', 'RA', 'RC3'] set(['RU', 'RG', 'RC3', 'RG5', 'RA'])``

    @todo RG5 etc -- done!

    gtxt::

     del 1
     r RU & a C1'
     name 1 uC1s
     r RU & a C2
     name 2 uC2
     r RU & a C2'
     name 3 uC2s
     ...

    return, gtxt (groups_txt), energygrps . The result is saved to g_fn.
    energygrps:
    ['uP', 'uC4s', 'uC2', 'uC4', 'uC6', 'gP', 'gC4s', 'gC2', 'gC4', 'gC6', 'aP', 'aC4s', 'aC2', 'aC4', 'aC6']
    gtxt:
    RA
    del 1
    r RU & a P
    name 1 uP
    r RU & a C4s
    name 2 uC4s
    r RU & a C2
    name 3 uC2
    r RU & a C4
    [..]
    r RA & a C6
    name 15 aC6
    1|2|3|4|5|6|7|8|9|10|11|12|13|14|15
    name 16 RNA_5pt
    0 & ! 16
    name 17 other
    q
    """
    p = pf.PDBFile(pdb_path = fn) 
    seq = p.seq_from_amber_like_pdb().split()
    seq_uniq_sorted = set(seq)
    if verbose: print(('seq:', seq, seq_uniq_sorted))
    seq_rnakb_order = []
    if 'RA' in seq_uniq_sorted:
        seq_rnakb_order.append('RA')
    if 'RU' in seq_uniq_sorted:
        seq_rnakb_order.append('RU')
    if 'RG' in seq_uniq_sorted:
        seq_rnakb_order.append('RG')
    if 'RC' in seq_uniq_sorted:
        seq_rnakb_order.append('RC')
    if verbose:print(('seq_rnakb_order', seq_rnakb_order))
    
    gtxt = 'del 1\n'
    c = 1
    if potential == 'aa':
        # rg
        rg_atoms = "C3',C5,C4,C6,C8,O2',P,C2',O5',C5',C1',O3',O6,N2,N3,N1,N7,N9,C2,C4',O4',OP2,OP1".split(',')
        rg_atoms2 = ['g' + a.strip().replace("'", 's') for a in rg_atoms]
        # ru
        ug_atoms =  "C1',C2,C2',C3',C4,C4',C5,C5',C6,N1,N3,O2,O2',O3',O4,O4',O5',OP1,OP1,P".split(",")
        ug_atoms2 = ['u' + a.strip().replace("'", 's') for a in ug_atoms]
        # ag
        ag_atoms = "O3',O2',N7,N1,N3,N9,C2',O5',N6,C5',C1',C2,C6,C5,C4,O4',C4',C8,C3',P,OP1,OP2".split(',')
        ag_atoms2 = ['a' + a.strip().replace("'", 's') for a in ag_atoms]
        # cg
        cg_atoms = "C2',O2',C5',O5',C4,O2,C3',C2,O3',N4,N3,N1,P,C1',O4',C4',C5,C6,OP1,OP2".split(',')
        cg_atoms2 = ['c' + a.strip().replace("'", 's') for a in cg_atoms]
        #N1, N3, O4', C5', O3', C2', C4, C1', O5', O1P, C4', C6, C5, C2, C3', P, O2P, O2, O4, O2'
        if verbose: print(('len-s:', len(rg_atoms), len(cg_atoms), len(ag_atoms), len(ug_atoms)))
    elif potential == '5pt':
        # rg
        rg_atoms = ["P", "C4'", "C2", "C4", "C6"]
        ug_atoms = ["P", "C4'", "C2", "C4", "C6"]
        ag_atoms = ["P", "C4'", "C2", "C4", "C6"]                
        cg_atoms = ["P", "C4'", "C2", "C4", "C6"]                
        rg_atoms2 = ['g' + a.strip().replace("'", 's') for a in rg_atoms]
        ug_atoms2 = ['u' + a.strip().replace("'", 's') for a in ug_atoms]
        ag_atoms2 = ['a' + a.strip().replace("'", 's') for a in ag_atoms]
        cg_atoms2 = ['c' + a.strip().replace("'", 's') for a in cg_atoms]
    else:
        raise Exception('Unknown potential, use all or 5pt')

    energygrps = []

    for r in seq_rnakb_order:
        if r == 'RA':
            for x, y in zip(ag_atoms, ag_atoms2):
                gtxt += 'r RA & a %s\n' % x
                gtxt += 'name %i %s\n' % (c, y)
                c += 1
            energygrps.extend(ag_atoms2)

        if r == 'RU':
            for x, y in zip(ug_atoms, ug_atoms2):
                gtxt += 'r RU & a %s\n' % x
                gtxt += 'name %i %s\n' % (c, y)
                c += 1
            energygrps.extend(ug_atoms2)

        if r == 'RG':
            for x, y in zip(rg_atoms, rg_atoms2):
                gtxt += 'r RG & a %s\n' % x
                gtxt += 'name %i %s\n' % (c, y)
                c += 1
            energygrps.extend(rg_atoms2)

        if r == 'RC':
            for x, y in zip(cg_atoms, cg_atoms2):
                gtxt += 'r RC & a %s\n' % x
                gtxt += 'name %i %s\n' % (c, y)
                c += 1
            energygrps.extend(cg_atoms2)

    gtxt += '|'.join([str(x) for x in range(1, c)])
    gtxt += '\nname %i RNA_%s' % (c, potential) # @todo
    gtxt += '\n0 & ! %i' % (c)
    gtxt += '\nname %i other' % (c + 1)
    gtxt += '\nq\n'
    if verbose: print(gtxt)

    with open(gr_fn, 'w') as f:
        f.write(gtxt)

    if verbose: print(('energygrps', energygrps))
    return gtxt, energygrps, seq_rnakb_order


def format_score_mdp(mdp_out, energygrps, seq, verbose=False):
    """Get a template score mdp and replace energygrps
    (it can be generated with prepare_groups)
    and energygrp_table
    """
    # load template
    with open(LIB_PATH + 'md/' + MDP_TEMPLATE, 'r') as f:
        txt = f.readlines()

    with open(LIB_PATH + 'md/data/rnakb_all.txt', 'r') as f:
        pairs = [i.strip() for i in f.readlines()]

    nmdp = ''
    for l in txt:
        if l.startswith('energygrps'):
            l = 'energygrps               = ' + ' '.join(energygrps) + ' other'
            nmdp += l
        elif l.startswith('energygrp_table'):
            d = ''
            for x in energygrps:  # ugly :-(
                for y in energygrps:
                    s = '%s_%s' % (x, y) # Library file table_uP_aP.xvg not found in current dir nor in your GMXLIB path.
                    s_reverse = '%s_%s' % (y, x) # try: /home/mqapRNA/mqaprna_env/db/RNA_5pt_full_sc1/table_aP_uP.xvg
                    if s in pairs:
                        d += '%s ' % s.replace('_', ' ') # '_' -> ' '
            l = 'energygrp_table          = ' + d.strip()
            nmdp += l
        elif l.startswith('energygrp_excl'):
            l = 'energygrp_excl           =  other other ' + ' other '.join(energygrps) + ' other'
            nmdp += l
        else:
            nmdp += l
    if verbose: print(nmdp)

    with open(mdp_out, 'w') as f:
        f.write(nmdp)


def format_score_mdp_aa2(mdp_out, energygrps, seq, verbose=False):
    """Get a template score mdp and replace energygrps
    (it can be generated with prepare_groups)
    and energygrp_table
    """
    # load template
    with open(LIB_PATH + 'rnakb_utils/' + MDP_TEMPLATE, 'r') as f:
        txt = f.readlines()

    with open(LIB_PATH + 'rnakb_utils/data/rnakb_all.txt', 'r') as f:
        pairs = [i.strip() for i in f.readlines()]

    nmdp = ''
    for l in txt:
        if l.startswith('energygrps'):
            l = 'energygrps               = ' + ' '.join(energygrps) + ' other'
            nmdp += l
        elif l.startswith('energygrp_table'):
            d = ''
            c = 1
            used = ''
            for x in energygrps:  # ugly :-(
                for y in energygrps:
                    s = '%s_%s' % (x, y) # Library file table_uP_aP.xvg not found in current dir nor in your GMXLIB path.
                    if os.path.isfile('/Users/magnus/work/papers/mqaprna/misc/mqaprna-db/RNA_aa_full/table_%s.xvg' % s):
                        print(c, 'ok', s)
                    else:
                        s = '%s_%s' % (y, x) 
                        if os.path.isfile('/Users/magnus/work/papers/mqaprna/misc/mqaprna-db/RNA_aa_full/table_%s.xvg' % s):
                            print(c, 're', s)
                    print
                    # try: /home/mqapRNA/mqaprna_env/db/RNA_5pt_full_sc1/table_aP_uP.xvg
                    if s.replace('_', ' ') not in d:
                        d += '%s ' % s.replace('_', ' ') # '_' -> ' '
                        print(c, '!!', s)
                        c += 1   
                        
            l = 'energygrp_table          = ' + d.strip()
            print(len(l.split()))
            nmdp += l
        elif l.startswith('energygrp_excl'):
            l = 'energygrp_excl           =  other other ' + ' other '.join(energygrps) + ' other'
            nmdp += l
        else:
            nmdp += l
    if verbose: print(nmdp)

    with open(mdp_out, 'w') as f:
        f.write(nmdp)

def format_score_mdp_aa(mdp_out, energygrps, seq, potential, verbose=False):
    """Get a template score mdp and replace energygrps
    (it can be generated with prepare_groups)
    and energygrp_table
    """
    # load template
    with open(LIB_PATH + 'rnakb_utils/' + MDP_TEMPLATE, 'r') as f:
        txt = f.readlines()

    with open(LIB_PATH + 'rnakb_utils/data/rnakb_all.txt', 'r') as f:
        pairs = [i.strip() for i in f.readlines()]

    nmdp = ''
    c = 1
    for l in txt:
        if l.startswith('energygrps'):
            energygrps = []
            for i in os.listdir('/Users/magnus/work/papers/mqaprna/misc/mqaprna-db/RNA_aa_full'):
                if 'table_' in i:
                    a, b = i.replace('table_', '').replace('.xvg', '').replace('_', ' ').split()
                    if a not in energygrps:
                        energygrps.append(a)
                    if b not in energygrps:
                        energygrps.append(b)
            l = 'energygrps               = ' + ' '.join(energygrps) + ' other'
            nmdp += l
        elif l.startswith('energygrp_table'):
            groups = []
            for i in os.listdir('/Users/magnus/work/papers/mqaprna/misc/mqaprna-db/RNA_aa_full'):
                if 'table_' in i:
                    d = i.replace('table_', '').replace('.xvg', '').replace('_', ' ')
                    if verbose: print(i.ljust(20), d)
                    groups.append(d)
            nmdp += 'energygrp_table = ' + ' '.join(groups)
        elif l.startswith('energygrp_excl'):
            l = 'energygrp_excl           =  other other ' + ' other '.join(energygrps) + ' other'
            nmdp += l
        else:
            nmdp += l
    if verbose: print(nmdp)

    with open(mdp_out, 'w') as f:
        f.write(nmdp)


def get_parser():
        parser = argparse.ArgumentParser(
            description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)

        #parser.add_argument('-', "--", help="", default="")
        parser.add_argument("-v", "--verbose",
                            action="store_true", help="be verbose")
        parser.add_argument("file", help="", default="")
        return parser

# main
if __name__ == '__main__':
    #fn = 'test_data/decoy0165_amb_clx.pdb'
    fn = 'test_data/1duq.pdb'
    fn = 'test_data/1duq_rpr.pdb'
    # format_score_mdp('out.txt', '', '')
    # print(open('out.txt').read())
    from rna_tools.rna_tools_lib import RNAStructure
    r = RNAStructure(fn)
    for i in r.get_all_chain_ids():
        chain = r.get_chain(i)
        nchain = make_rna_gromacs_ready(chain)
        print(nchain).strip()


1			#!/usr/bin/env python
2			# -- coding: utf-8 --
3
4			"""RNAkb (previous Gromacs) utils.
5
6			A module with different functions needed for Gromacs/RNAkb merriage.
7
8			Marcin Magnus
9			Albert Bogdanowicz
10
11			(1) prepare groups and then (2) mdp score file.
12			"""
13
14			import re
15			import os
16			import argparse
17			import rna_tools.tools.pdb_formatix.PDBFile as pf
18
19			LIB_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + os.sep
20			VERBOSE = False
21			MDP_TEMPLATE = 'data/score_rnakb_orig.mdp'
22
23			GROMACS_ALLOWED_5 = ("H5T", "O5'", "C5'", "H5'1", "H5'2", "C4'", "H4'",
24			"O4'", "C1'", "H1'", "N9", "C8", "H8", "N7", "C5", "C6", "N6", "H61",
25			"H62", "N1", "C2", "H2", "N3", "C4", "C3'", "H3'", "C2'", "H2'1",
26			"O2'", "HO'2", "O3'",
27			# new atoms
28			"O2", "O4", "O6", "N2", "N4",
29			'OP1', 'OP2', 'OP3', 'H21', 'H22', "H2'", "H5'", "HO5'", "H5''", "HO2'",
30			'P', 'H3', 'H1', 'H6', 'H5', 'H42', 'H41',
31			)
32
33			GROMACS_ALLOWED_MIDDLE = ("P", "O1P", "O2P", "O5'", "C5'", "H5'1", "H5'2",
34			"C4'", "H4'", "O4'", "C1'", "H1'", "N9", "C8", "H8", "N7", "C5", "C6",
35			"N6", "H61", "H62", "N1", "C2", "H2", "N3", "C4", "C3'", "H3'", "C2'",
36			"H2'1", "O2'", "HO'2", "O3'",
37			# new atoms
38			"O2", "O4", "O6", "N2", "N4",
39			'OP1', 'OP2', 'OP3', 'H21', 'H22', 'O6', "H2'", "H5'", "HO5'", "H5''", "HO2'",
40			'P', 'H3', 'H1', 'H6', 'H5', 'H42', 'H41',
41			)
42
43			GROMACS_REQUIRED_GROUPS = ("aP", "aC4s", "aC2", "aC4", "aC6",
44			"uP", "uC4s", "uC2", "uC4", "uC6",
45			"gP", "gC4s", "gC2", "gC4", "gC6",
46			"cP", "cC4s", "cC2", "cC4", "cC6",
47			"RNA_5pt", "other"
48			)
49
50
51			def get_res_num(line):
52			"""Extract residue number from a line of PDB file
53
54			Arguments:
55			* line = ATOM line from a PDB file
56
57			Output:
58			* residue number as an integer
59			"""
60			return int(''.join([x for x in line[22:27] if x.isdigit()]))
61
62
63			def get_res_code(line):
64			"""Get residue code from a line of a PDB file
65			"""
66			return line[17:20]
67
68			def set_res_code(line, code):
69			"""Set residue code from a line of a PDB file
70			"""
71			return line[:17] + code.rjust(3) + line[20:]
72
73			def make_rna_gromacs_ready(pdb_string, verbose=VERBOSE):
74			"""GROMACS has some special requirements for PDB files.
75
76			Arguments:
77			* pdb_string = contents of PDB file as a string
78
79			Output:
80			* new PDB returned as a string
81
82			(!!!) # hmm... [ RA5 ] will not be detected based on it (!?)
83			Hmm.. becase it detects if the structure is already prepared.
84			"""
85			#pdb_string = resname_3to1(pdb_string)
86			#pdb_string = remove_hetatms(pdb_string)
87			result = []
88			pdb_lines = pdb_string.split('\n')
89
90			# find smallest residue number # ugly this does not work for 2 chains
91			min_res = min(list(map(get_res_num,
92			[l for l in pdb_lines if l.startswith('ATOM')])))
93			max_res = max(list(map(get_res_num,
94			[l for l in pdb_lines if l.startswith('ATOM')])))
95
96			for l in pdb_lines:
97			if l.startswith('ATOM') and l[17:20].strip() in ('A', 'U', 'C', 'G'): # hmm... [ RA5 ] will not be detected based on it (!?)
98			res = get_res_code(l)
99			#if res.startswith('R') and res.endswith('5') : # it's RX5 file so skip fixing
100			# if verbose:
101			# print('-- already gromacs ready')
102			# return pdb_string
103
104			if l.startswith('TER'):
105			continue
106
107			l = l.replace('*', '\'')
108			l = l.replace('O1P', 'OP1')
109			l = l.replace('O2P', 'OP2')
110
111			res_num = get_res_num(l)
112			atom_type = l.split()[2].strip()
113
114			# remove P OP1 OP2
115			if res_num == min_res and atom_type == 'P':
116			continue
117			if res_num == min_res and atom_type == 'OP1':
118			continue
119			if res_num == min_res and atom_type == 'OP2':
120			continue
121
122			# convert G -> RG5, RG3
123			# do this only if res does not start with R
124			cres = get_res_code(l).strip()
125			# print(cres)
126			# aaaaaaaaa
127			if not cres.startswith('R'):
128			if res_num == min_res: # RG5
129			l = set_res_code(l, 'R' + get_res_code(l).strip() + '5')
130			elif res_num == max_res: # RG3
131			l = set_res_code(l, 'R' + get_res_code(l).strip() + '3')
132			else:
133			l = set_res_code(l, ' R' + get_res_code(l).strip())
134
135			if res_num == min_res:
136			if atom_type in GROMACS_ALLOWED_5:
137			result.append(l)
138			else:
139			print('Wrong start line: ', l, atom_type)
140			else:
141			if atom_type in GROMACS_ALLOWED_MIDDLE:
142			result.append(l)
143			else:
144			print('Wrong middle line: ', l, atom_type)
145			else:
146			result.append(l)
147			return '\n'.join(result)
148
149			def make_rna_rnakb_ready(pdb_string, verbose=VERBOSE):
150			"""RNAkb read (difference between this function and
151			make_rna_gromacs_ready is ignoring R5U etc. RNAkb does not treat
152			them differently so there is no point to distinguish them.
153
154			Arguments:
155			* pdb_string = contents of PDB file as a string
156
157			Output:
158			* new PDB returned as a string
159			"""
160			#pdb_string = resname_3to1(pdb_string)
161			#pdb_string = remove_hetatms(pdb_string)
162			result = []
163			pdb_lines = pdb_string.split('\n')
164
165			# find smallest residue number
166			min_res = min(list(map(get_res_num,
167			[l for l in pdb_lines if l.startswith('ATOM')])))
168			max_res = max(list(map(get_res_num,
169			[l for l in pdb_lines if l.startswith('ATOM')])))
170
171			for l in pdb_lines:
172			if l.startswith('ATOM'):# and l[19] in ('A', 'U', 'C', 'G'):
173			res = get_res_code(l)
174			#if res.startswith('R') and res.endswith('5') : # it's RX5 file so skip fixing
175			# if verbose:
176			# print '-- already gromacs ready'
177			# return pdb_string
178
179			l = l.replace('*', '\'')
180			l = l.replace('O1P', 'OP1')
181			l = l.replace('O2P', 'OP2')
182
183			res_num = get_res_num(l)
184			atom_type = l.split()[2].strip()
185			# remove P OP1 OP2
186			if res_num == min_res and atom_type == 'P':
187			continue
188			if res_num == min_res and atom_type == 'OP1':
189			continue
190			if res_num == min_res and atom_type == 'OP2':
191			continue
192
193			# convert G -> RG5, RG3
194			#if res_num == min_res: # RG5
195			# l = set_res_code(l, 'R' + get_res_code(l).strip() + '5')
196			#elif res_num == max_res: # RG3
197			# l = set_res_code(l, 'R' + get_res_code(l).strip() + '3')
198			#else:
199			l = set_res_code(l, ' R' + get_res_code(l).strip().replace('R','').replace('3','').replace('5','')) #
200			if res_num == min_res:
201			if atom_type in GROMACS_ALLOWED_5:
202			result.append(l)
203			else:
204			print(('Wrong start line: ', l, atom_type))
205			else:
206			if atom_type in GROMACS_ALLOWED_MIDDLE:
207			result.append(l)
208			else:
209			print(('Wrong middle line: ', l, atom_type))
210			else: # keep TER, etc.
211			result.append(l)
212			return '\n'.join(result)
213
214			def fix_gromacs_gro(path, verbose=False):
215			"""It's probably a bug in GROMACS, but box coordinates in gro files are
216			not always separated by spaces. This function guesses how it should be
217			separated and inserts spaces.
218
219			Arguments:
220			* path = path to gro file
221
222			Output:
223			* file is overwritten with a corrected one
224			"""
225			f = open(path)
226			gro = f.read()
227			f.close()
228			gro_lines = gro.split('\n')
229			last_line = gro_lines[-2]
230
231			# check if there are a space
232			if last_line.find(' ') == -1:
233			dots = [i.start() for i in re.finditer('\\.', last_line)]
234			# next 4 lines are a guess, I hope it works
235			digits = len(last_line[dots[2]:])
236			box = [last_line[:dots[0] + digits],
237			last_line[dots[0] + digits:dots[1] + digits], last_line[dots[1] + digits:]]
238			gro_lines = gro_lines[:-2]
239			gro_lines.append(' '.join(box))
240			gro_lines.append('')
241			f = open(path, 'w')
242			f.write('\n'.join(gro_lines))
243			f.close()
244
245
246			def fix_gromacs_ndx(path):
247			"""Sometimes, GROMACS index has some atoms in more than one group, or
248			doesn't have all the groups grompp requires. This function fixes that.
249
250			Arguments:
251			* path = path to index file
252
253			Output:
254			* index is overwritten with a corrected one
255			"""
256			f = open(path)
257			index = f.read()
258			f.close()
259			# split file into system group and the rest
260			system_group = index.split('[ System ]')[1].split('[')[0]
261			other_groups = ['[' + i for i in
262			index.split('[ System ]')[1].split('[')[1:]]
263			# remove duplicate numbers
264			taken_atoms = []
265			for g in range(len(other_groups)):
266			header = other_groups[g].split('\n')[0]
267			group = other_groups[g].split('\n')[1].split()
268			group = [a for a in group if a not in taken_atoms]
269			taken_atoms.extend(group)
270			other_groups[g] = header + '\n' + ' '.join(group) + '\n'
271			# build result, part 1
272			result = ['[ System ]' + system_group]
273			result.extend(other_groups)
274			# add missing groups, leave them empty
275			headers = [g.split('\n')[0][2:-2] for g in other_groups]
276			missing_headers = [h for h in GROMACS_REQUIRED_GROUPS if h not in headers]
277			result.extend(['[ %s ]\n' % h for h in missing_headers])
278			# write result to file
279			result = ''.join(result)
280			f = open(path, 'w')
281			f.write(result)
282			f.close()
283
284
285			def prepare_groups(fn, gr_fn, potential='aa', verbose=False):
286			""" Prepare an index for fn file. gr_fn is a file where gtxt is saved in.
287
288			Get seq and uniq & sort it.
289			``['RG5', 'RA', 'RA', 'RA', 'RG', 'RU', 'RA', 'RA', 'RC3'] set(['RU', 'RG', 'RC3', 'RG5', 'RA'])``
290
291			@todo RG5 etc -- done!
292
293			gtxt::
294
295			del 1
296			r RU & a C1'
297			name 1 uC1s
298			r RU & a C2
299			name 2 uC2
300			r RU & a C2'
301			name 3 uC2s
302			...
303
304			return, gtxt (groups_txt), energygrps . The result is saved to g_fn.
305			energygrps:
306			['uP', 'uC4s', 'uC2', 'uC4', 'uC6', 'gP', 'gC4s', 'gC2', 'gC4', 'gC6', 'aP', 'aC4s', 'aC2', 'aC4', 'aC6']
307			gtxt:
308			RA
309			del 1
310			r RU & a P
311			name 1 uP
312			r RU & a C4s
313			name 2 uC4s
314			r RU & a C2
315			name 3 uC2
316			r RU & a C4
317			[..]
318			r RA & a C6
319			name 15 aC6
320			1\|2\|3\|4\|5\|6\|7\|8\|9\|10\|11\|12\|13\|14\|15
321			name 16 RNA_5pt
322			0 & ! 16
323			name 17 other
324			q
325			"""
326			p = pf.PDBFile(pdb_path = fn)
327			seq = p.seq_from_amber_like_pdb().split()
328			seq_uniq_sorted = set(seq)
329			if verbose: print(('seq:', seq, seq_uniq_sorted))
330			seq_rnakb_order = []
331			if 'RA' in seq_uniq_sorted:
332			seq_rnakb_order.append('RA')
333			if 'RU' in seq_uniq_sorted:
334			seq_rnakb_order.append('RU')
335			if 'RG' in seq_uniq_sorted:
336			seq_rnakb_order.append('RG')
337			if 'RC' in seq_uniq_sorted:
338			seq_rnakb_order.append('RC')
339			if verbose:print(('seq_rnakb_order', seq_rnakb_order))
340
341			gtxt = 'del 1\n'
342			c = 1
343			if potential == 'aa':
344			# rg
345			rg_atoms = "C3',C5,C4,C6,C8,O2',P,C2',O5',C5',C1',O3',O6,N2,N3,N1,N7,N9,C2,C4',O4',OP2,OP1".split(',')
346			rg_atoms2 = ['g' + a.strip().replace("'", 's') for a in rg_atoms]
347			# ru
348			ug_atoms = "C1',C2,C2',C3',C4,C4',C5,C5',C6,N1,N3,O2,O2',O3',O4,O4',O5',OP1,OP1,P".split(",")
349			ug_atoms2 = ['u' + a.strip().replace("'", 's') for a in ug_atoms]
350			# ag
351			ag_atoms = "O3',O2',N7,N1,N3,N9,C2',O5',N6,C5',C1',C2,C6,C5,C4,O4',C4',C8,C3',P,OP1,OP2".split(',')
352			ag_atoms2 = ['a' + a.strip().replace("'", 's') for a in ag_atoms]
353			# cg
354			cg_atoms = "C2',O2',C5',O5',C4,O2,C3',C2,O3',N4,N3,N1,P,C1',O4',C4',C5,C6,OP1,OP2".split(',')
355			cg_atoms2 = ['c' + a.strip().replace("'", 's') for a in cg_atoms]
356			#N1, N3, O4', C5', O3', C2', C4, C1', O5', O1P, C4', C6, C5, C2, C3', P, O2P, O2, O4, O2'
357			if verbose: print(('len-s:', len(rg_atoms), len(cg_atoms), len(ag_atoms), len(ug_atoms)))
358			elif potential == '5pt':
359			# rg
360			rg_atoms = ["P", "C4'", "C2", "C4", "C6"]
361			ug_atoms = ["P", "C4'", "C2", "C4", "C6"]
362			ag_atoms = ["P", "C4'", "C2", "C4", "C6"]
363			cg_atoms = ["P", "C4'", "C2", "C4", "C6"]
364			rg_atoms2 = ['g' + a.strip().replace("'", 's') for a in rg_atoms]
365			ug_atoms2 = ['u' + a.strip().replace("'", 's') for a in ug_atoms]
366			ag_atoms2 = ['a' + a.strip().replace("'", 's') for a in ag_atoms]
367			cg_atoms2 = ['c' + a.strip().replace("'", 's') for a in cg_atoms]
368			else:
369			raise Exception('Unknown potential, use all or 5pt')
370
371			energygrps = []
372
373			for r in seq_rnakb_order:
374			if r == 'RA':
375			for x, y in zip(ag_atoms, ag_atoms2):
376			gtxt += 'r RA & a %s\n' % x
377			gtxt += 'name %i %s\n' % (c, y)
378			c += 1
379			energygrps.extend(ag_atoms2)
380
381			if r == 'RU':
382			for x, y in zip(ug_atoms, ug_atoms2):
383			gtxt += 'r RU & a %s\n' % x
384			gtxt += 'name %i %s\n' % (c, y)
385			c += 1
386			energygrps.extend(ug_atoms2)
387
388			if r == 'RG':
389			for x, y in zip(rg_atoms, rg_atoms2):
390			gtxt += 'r RG & a %s\n' % x
391			gtxt += 'name %i %s\n' % (c, y)
392			c += 1
393			energygrps.extend(rg_atoms2)
394
395			if r == 'RC':
396			for x, y in zip(cg_atoms, cg_atoms2):
397			gtxt += 'r RC & a %s\n' % x
398			gtxt += 'name %i %s\n' % (c, y)
399			c += 1
400			energygrps.extend(cg_atoms2)
401
402			gtxt += '\|'.join([str(x) for x in range(1, c)])
403			gtxt += '\nname %i RNA_%s' % (c, potential) # @todo
404			gtxt += '\n0 & ! %i' % (c)
405			gtxt += '\nname %i other' % (c + 1)
406			gtxt += '\nq\n'
407			if verbose: print(gtxt)
408
409			with open(gr_fn, 'w') as f:
410			f.write(gtxt)
411
412			if verbose: print(('energygrps', energygrps))
413			return gtxt, energygrps, seq_rnakb_order
414
415
416			def format_score_mdp(mdp_out, energygrps, seq, verbose=False):
417			"""Get a template score mdp and replace energygrps
418			(it can be generated with prepare_groups)
419			and energygrp_table
420			"""
421			# load template
422			with open(LIB_PATH + 'md/' + MDP_TEMPLATE, 'r') as f:
423			txt = f.readlines()
424
425			with open(LIB_PATH + 'md/data/rnakb_all.txt', 'r') as f:
426			pairs = [i.strip() for i in f.readlines()]
427
428			nmdp = ''
429			for l in txt:
430			if l.startswith('energygrps'):
431			l = 'energygrps = ' + ' '.join(energygrps) + ' other'
432			nmdp += l
433			elif l.startswith('energygrp_table'):
434			d = ''
435			for x in energygrps: # ugly :-(
436			for y in energygrps:
437			s = '%s_%s' % (x, y) # Library file table_uP_aP.xvg not found in current dir nor in your GMXLIB path.
438			s_reverse = '%s_%s' % (y, x) # try: /home/mqapRNA/mqaprna_env/db/RNA_5pt_full_sc1/table_aP_uP.xvg
439			if s in pairs:
440			d += '%s ' % s.replace('_', ' ') # '_' -> ' '
441			l = 'energygrp_table = ' + d.strip()
442			nmdp += l
443			elif l.startswith('energygrp_excl'):
444			l = 'energygrp_excl = other other ' + ' other '.join(energygrps) + ' other'
445			nmdp += l
446			else:
447			nmdp += l
448			if verbose: print(nmdp)
449
450			with open(mdp_out, 'w') as f:
451			f.write(nmdp)
452
453
454			def format_score_mdp_aa2(mdp_out, energygrps, seq, verbose=False):
455			"""Get a template score mdp and replace energygrps
456			(it can be generated with prepare_groups)
457			and energygrp_table
458			"""
459			# load template
460			with open(LIB_PATH + 'rnakb_utils/' + MDP_TEMPLATE, 'r') as f:
461			txt = f.readlines()
462
463			with open(LIB_PATH + 'rnakb_utils/data/rnakb_all.txt', 'r') as f:
464			pairs = [i.strip() for i in f.readlines()]
465
466			nmdp = ''
467			for l in txt:
468			if l.startswith('energygrps'):
469			l = 'energygrps = ' + ' '.join(energygrps) + ' other'
470			nmdp += l
471			elif l.startswith('energygrp_table'):
472			d = ''
473			c = 1
474			used = ''
475			for x in energygrps: # ugly :-(
476			for y in energygrps:
477			s = '%s_%s' % (x, y) # Library file table_uP_aP.xvg not found in current dir nor in your GMXLIB path.
478			if os.path.isfile('/Users/magnus/work/papers/mqaprna/misc/mqaprna-db/RNA_aa_full/table_%s.xvg' % s):
479			print(c, 'ok', s)
480			else:
481			s = '%s_%s' % (y, x)
482			if os.path.isfile('/Users/magnus/work/papers/mqaprna/misc/mqaprna-db/RNA_aa_full/table_%s.xvg' % s):
483			print(c, 're', s)
484			print
485			# try: /home/mqapRNA/mqaprna_env/db/RNA_5pt_full_sc1/table_aP_uP.xvg
486			if s.replace('_', ' ') not in d:
487			d += '%s ' % s.replace('_', ' ') # '_' -> ' '
488			print(c, '!!', s)
489			c += 1
490
491			l = 'energygrp_table = ' + d.strip()
492			print(len(l.split()))
493			nmdp += l
494			elif l.startswith('energygrp_excl'):
495			l = 'energygrp_excl = other other ' + ' other '.join(energygrps) + ' other'
496			nmdp += l
497			else:
498			nmdp += l
499			if verbose: print(nmdp)
500
501			with open(mdp_out, 'w') as f:
502			f.write(nmdp)
503
504			def format_score_mdp_aa(mdp_out, energygrps, seq, potential, verbose=False):
505			"""Get a template score mdp and replace energygrps
506			(it can be generated with prepare_groups)
507			and energygrp_table
508			"""
509			# load template
510			with open(LIB_PATH + 'rnakb_utils/' + MDP_TEMPLATE, 'r') as f:
511			txt = f.readlines()
512
513			with open(LIB_PATH + 'rnakb_utils/data/rnakb_all.txt', 'r') as f:
514			pairs = [i.strip() for i in f.readlines()]
515
516			nmdp = ''
517			c = 1
518			for l in txt:
519			if l.startswith('energygrps'):
520			energygrps = []
521			for i in os.listdir('/Users/magnus/work/papers/mqaprna/misc/mqaprna-db/RNA_aa_full'):
522			if 'table_' in i:
523			a, b = i.replace('table_', '').replace('.xvg', '').replace('_', ' ').split()
524			if a not in energygrps:
525			energygrps.append(a)
526			if b not in energygrps:
527			energygrps.append(b)
528			l = 'energygrps = ' + ' '.join(energygrps) + ' other'
529			nmdp += l
530			elif l.startswith('energygrp_table'):
531			groups = []
532			for i in os.listdir('/Users/magnus/work/papers/mqaprna/misc/mqaprna-db/RNA_aa_full'):
533			if 'table_' in i:
534			d = i.replace('table_', '').replace('.xvg', '').replace('_', ' ')
535			if verbose: print(i.ljust(20), d)
536			groups.append(d)
537			nmdp += 'energygrp_table = ' + ' '.join(groups)
538			elif l.startswith('energygrp_excl'):
539			l = 'energygrp_excl = other other ' + ' other '.join(energygrps) + ' other'
540			nmdp += l
541			else:
542			nmdp += l
543			if verbose: print(nmdp)
544
545			with open(mdp_out, 'w') as f:
546			f.write(nmdp)
547
548
549			def get_parser():
550			parser = argparse.ArgumentParser(
551			description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
552
553			#parser.add_argument('-', "--", help="", default="")
554			parser.add_argument("-v", "--verbose",
555			action="store_true", help="be verbose")
556			parser.add_argument("file", help="", default="")
557			return parser
558
559			# main
560			if __name__ == '__main__':
561			#fn = 'test_data/decoy0165_amb_clx.pdb'
562			fn = 'test_data/1duq.pdb'
563			fn = 'test_data/1duq_rpr.pdb'
564			# format_score_mdp('out.txt', '', '')
565			# print(open('out.txt').read())
566			from rna_tools.rna_tools_lib import RNAStructure
567			r = RNAStructure(fn)
568			for i in r.get_all_chain_ids():
569			chain = r.get_chain(i)
570			nchain = make_rna_gromacs_ready(chain)
571			print(nchain).strip()
572

mmagnus / rna-tools

set_res_code() A last analyzed 2025-07-29 00:29 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

set_res_code() A
last analyzed 2025-07-29 00:29 UTC