build.rna_tools.rna_cif2pdb.get_parser() - Code Metrics - mmagnus/rna-tools - Measure and Improve Code Quality continuously with Scrutinizer

build.rna_tools.rna_cif2pdb.get_parser() A
last analyzed 2026-01-16 23:21 UTC

↳ Parent: build.rna_tools.rna_cif2pdb

Complexity

Conditions

Size

Total Lines	16
Code Lines	14

Duplication

Lines	16
Ratio	100 %

Importance

Changes

Metric	Value
cc	1
eloc	14
nop	0
dl	16
loc	16
rs	9.7
c	0
b	0
f	0

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""

"""
from __future__ import print_function
import argparse
from icecream import ic
import sys
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr))
ic.configureOutput(prefix='> ')

from rna_tools.rna_tools_lib import edit_pdb, add_header, get_version
import os


def convert_cif_to_pdb(cif_file, add_header_to_output=True, version='', verbose=True,
                       split_conflicting_chains=True, output_path=None):
    """Convert an mmCIF file into one or more PDB files.

    Returns a list of generated PDB filenames. When a direct conversion fails
    (e.g. multi-character chain ids) the chains are split into separate models
    following the legacy CLI behavior.
    """
    from Bio.PDB import MMCIFParser, PDBIO

    def _prepend_header(pdb_file, remarks=None):
        if not add_header_to_output and not remarks:
            return
        new_content = ''
        if add_header_to_output:
            new_content += add_header(version) + '\n'
        if remarks:
            new_content += '\n'.join(remarks) + '\n'
        with open(pdb_file, 'r') as fh:
            new_content += fh.read()
        with open(pdb_file, 'w') as fh:
            fh.write(new_content)

    if output_path and split_conflicting_chains:
        raise ValueError('output_path is only supported when split_conflicting_chains is False')

    outputs = []
    parser = MMCIFParser()
    structure = parser.get_structure("structure_id", cif_file)
    pdb_file = output_path or cif_file.replace('.cif', '_fCIF.pdb')

    io = PDBIO()
    io.set_structure(structure)
    try:
        io.save(pdb_file)
        _prepend_header(pdb_file)
        outputs.append(pdb_file)
        if verbose:
            print(f'saved: {pdb_file}')
        return outputs
    except Exception:
        if verbose:
            print('Warning: some of the chains in this mmCIF file have multi-character ids.')

    from Bio.PDB.MMCIFParser import MMCIFParser
    from Bio.PDB import Structure, Model, PDBIO

    def has_high_rna_content(chain, threshold=0.8):
        rna_nucleotides = ['A', 'C', 'G', 'U', 'X']
        total_residues = 0
        rna_residues = 0
        for residue in chain:
            total_residues += 1
            if residue.get_resname().strip() in rna_nucleotides:
                rna_residues += 1
        if total_residues == 0:
            return False
        return (rna_residues / total_residues) >= threshold

    parser = MMCIFParser()
    structure = parser.get_structure("structure", cif_file)

    if not split_conflicting_chains:
        import string
        letters = list(string.ascii_uppercase + string.ascii_lowercase + string.digits)
        letter_iter = iter(letters)
        for model in structure:
            for chain in model:
                try:
                    chain.id = next(letter_iter)
                except StopIteration:
                    raise RuntimeError('Too many chains to assign unique single-letter IDs in %s' % cif_file)
        io = PDBIO()
        io.set_structure(structure)
        io.save(pdb_file)
        _prepend_header(pdb_file)
        outputs.append(pdb_file)
        if verbose:
            print(f'saved: {pdb_file} (renamed chain ids)')
        return outputs

    import string
    letters = list(string.ascii_uppercase)

    for model in structure:
        for chain in model:
            if has_high_rna_content(chain):
                new_structure = Structure.Structure("new_structure")
                new_model = Model.Model(0)
                new_structure.add(new_model)

                chain_id_new = letters.pop(0)
                chain_id = chain.get_id()

                atom_count = 0
                for residue in chain:
                    for atom in residue:
                        atom_count += 1

                remarks = [f'REMARK rna chain {chain.id} -> {chain_id_new}']
                pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb')
                if verbose:
                    print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}')

                chain.id = chain_id_new
                new_model.add(chain)

                io = PDBIO()
                io.set_structure(new_structure)
                io.save(pdb_file)
                _prepend_header(pdb_file, remarks)
                outputs.append(pdb_file)

    return outputs

def get_parser():

    parser = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)

    version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__)
    version = version[1].strip()
    parser = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('--version', help='', action='version', version=version)
    parser.add_argument("-v", "--verbose",
                        action="store_true", help="be verbose")
    parser.add_argument('--no-hr', help='do not insert the header into files',
                        action='store_true')
    parser.add_argument("file", help="", default="", nargs='+')
    return parser, version


if __name__ == '__main__':
    parser, version = get_parser()
    args = parser.parse_args()

    if list != type(args.file):
        args.file = [args.file]

    for cif_file in args.file:
        convert_cif_to_pdb(
            cif_file,
            add_header_to_output=not args.no_hr,
            version=version,
            verbose=True,
        )


1		#!/usr/bin/env python
2		# -- coding: utf-8 --
3		"""
4
5		"""
6		from __future__ import print_function
7		import argparse
8		from icecream import ic
9		import sys
10		ic.configureOutput(outputFunction=lambda a: print(a, file=sys.stderr))
11		ic.configureOutput(prefix='> ')
12
13		from rna_tools.rna_tools_lib import edit_pdb, add_header, get_version
14		import os
15
16
17		def convert_cif_to_pdb(cif_file, add_header_to_output=True, version='', verbose=True,
18		split_conflicting_chains=True, output_path=None):
19		"""Convert an mmCIF file into one or more PDB files.
20
21		Returns a list of generated PDB filenames. When a direct conversion fails
22		(e.g. multi-character chain ids) the chains are split into separate models
23		following the legacy CLI behavior.
24		"""
25		from Bio.PDB import MMCIFParser, PDBIO
26
27		def _prepend_header(pdb_file, remarks=None):
28		if not add_header_to_output and not remarks:
29		return
30		new_content = ''
31		if add_header_to_output:
32		new_content += add_header(version) + '\n'
33		if remarks:
34		new_content += '\n'.join(remarks) + '\n'
35		with open(pdb_file, 'r') as fh:
36		new_content += fh.read()
37		with open(pdb_file, 'w') as fh:
38		fh.write(new_content)
39
40		if output_path and split_conflicting_chains:
41		raise ValueError('output_path is only supported when split_conflicting_chains is False')
42
43		outputs = []
44		parser = MMCIFParser()
45		structure = parser.get_structure("structure_id", cif_file)
46		pdb_file = output_path or cif_file.replace('.cif', '_fCIF.pdb')
47
48		io = PDBIO()
49		io.set_structure(structure)
50		try:
51		io.save(pdb_file)
52		_prepend_header(pdb_file)
53		outputs.append(pdb_file)
54		if verbose:
55		print(f'saved: {pdb_file}')
56		return outputs
57		except Exception:
58		if verbose:
59		print('Warning: some of the chains in this mmCIF file have multi-character ids.')
60
61		from Bio.PDB.MMCIFParser import MMCIFParser
62		from Bio.PDB import Structure, Model, PDBIO
63
64		def has_high_rna_content(chain, threshold=0.8):
65		rna_nucleotides = ['A', 'C', 'G', 'U', 'X']
66		total_residues = 0
67		rna_residues = 0
68		for residue in chain:
69		total_residues += 1
70		if residue.get_resname().strip() in rna_nucleotides:
71		rna_residues += 1
72		if total_residues == 0:
73		return False
74		return (rna_residues / total_residues) >= threshold
75
76		parser = MMCIFParser()
77		structure = parser.get_structure("structure", cif_file)
78
79		if not split_conflicting_chains:
80		import string
81		letters = list(string.ascii_uppercase + string.ascii_lowercase + string.digits)
82		letter_iter = iter(letters)
83		for model in structure:
84		for chain in model:
85		try:
86		chain.id = next(letter_iter)
87		except StopIteration:
88		raise RuntimeError('Too many chains to assign unique single-letter IDs in %s' % cif_file)
89		io = PDBIO()
90		io.set_structure(structure)
91		io.save(pdb_file)
92		_prepend_header(pdb_file)
93		outputs.append(pdb_file)
94		if verbose:
95		print(f'saved: {pdb_file} (renamed chain ids)')
96		return outputs
97
98		import string
99		letters = list(string.ascii_uppercase)
100
101		for model in structure:
102		for chain in model:
103		if has_high_rna_content(chain):
104		new_structure = Structure.Structure("new_structure")
105		new_model = Model.Model(0)
106		new_structure.add(new_model)
107
108		chain_id_new = letters.pop(0)
109		chain_id = chain.get_id()
110
111		atom_count = 0
112		for residue in chain:
113		for atom in residue:
114		atom_count += 1
115
116		remarks = [f'REMARK rna chain {chain.id} -> {chain_id_new}']
117		pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb')
118		if verbose:
119		print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}')
120
121		chain.id = chain_id_new
122		new_model.add(chain)
123
124		io = PDBIO()
125		io.set_structure(new_structure)
126		io.save(pdb_file)
127		_prepend_header(pdb_file, remarks)
128		outputs.append(pdb_file)
129
130		return outputs
131
132	View Code Duplication	def get_parser():
		0 ignored issues – show Duplication introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
133		parser = argparse.ArgumentParser(
134		description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
135
136		version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__)
137		version = version[1].strip()
138		parser = argparse.ArgumentParser(
139		description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
140
141		parser.add_argument('--version', help='', action='version', version=version)
142		parser.add_argument("-v", "--verbose",
143		action="store_true", help="be verbose")
144		parser.add_argument('--no-hr', help='do not insert the header into files',
145		action='store_true')
146		parser.add_argument("file", help="", default="", nargs='+')
147		return parser, version
148
149
150		if __name__ == '__main__':
151		parser, version = get_parser()
152		args = parser.parse_args()
153
154		if list != type(args.file):
155		args.file = [args.file]
156
157		for cif_file in args.file:
158		convert_cif_to_pdb(
159		cif_file,
160		add_header_to_output=not args.no_hr,
161		version=version,
162		verbose=True,
163		)
164

mmagnus / rna-tools

build.rna_tools.rna_cif2pdb.get_parser() A last analyzed 2026-01-16 23:21 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

build.rna_tools.rna_cif2pdb.get_parser() A
last analyzed 2026-01-16 23:21 UTC