| Conditions | 26 |
| Total Lines | 114 |
| Code Lines | 92 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like build.rna_tools.rna_cif2pdb.convert_cif_to_pdb() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | #!/usr/bin/env python |
||
| 17 | def convert_cif_to_pdb(cif_file, add_header_to_output=True, version='', verbose=True, |
||
| 18 | split_conflicting_chains=True, output_path=None): |
||
| 19 | """Convert an mmCIF file into one or more PDB files. |
||
| 20 | |||
| 21 | Returns a list of generated PDB filenames. When a direct conversion fails |
||
| 22 | (e.g. multi-character chain ids) the chains are split into separate models |
||
| 23 | following the legacy CLI behavior. |
||
| 24 | """ |
||
| 25 | from Bio.PDB import MMCIFParser, PDBIO |
||
| 26 | |||
| 27 | def _prepend_header(pdb_file, remarks=None): |
||
| 28 | if not add_header_to_output and not remarks: |
||
| 29 | return |
||
| 30 | new_content = '' |
||
| 31 | if add_header_to_output: |
||
| 32 | new_content += add_header(version) + '\n' |
||
| 33 | if remarks: |
||
| 34 | new_content += '\n'.join(remarks) + '\n' |
||
| 35 | with open(pdb_file, 'r') as fh: |
||
| 36 | new_content += fh.read() |
||
| 37 | with open(pdb_file, 'w') as fh: |
||
| 38 | fh.write(new_content) |
||
| 39 | |||
| 40 | if output_path and split_conflicting_chains: |
||
| 41 | raise ValueError('output_path is only supported when split_conflicting_chains is False') |
||
| 42 | |||
| 43 | outputs = [] |
||
| 44 | parser = MMCIFParser() |
||
| 45 | structure = parser.get_structure("structure_id", cif_file) |
||
| 46 | pdb_file = output_path or cif_file.replace('.cif', '_fCIF.pdb') |
||
| 47 | |||
| 48 | io = PDBIO() |
||
| 49 | io.set_structure(structure) |
||
| 50 | try: |
||
| 51 | io.save(pdb_file) |
||
| 52 | _prepend_header(pdb_file) |
||
| 53 | outputs.append(pdb_file) |
||
| 54 | if verbose: |
||
| 55 | print(f'saved: {pdb_file}') |
||
| 56 | return outputs |
||
| 57 | except Exception: |
||
| 58 | if verbose: |
||
| 59 | print('Warning: some of the chains in this mmCIF file have multi-character ids.') |
||
| 60 | |||
| 61 | from Bio.PDB.MMCIFParser import MMCIFParser |
||
| 62 | from Bio.PDB import Structure, Model, PDBIO |
||
| 63 | |||
| 64 | def has_high_rna_content(chain, threshold=0.8): |
||
| 65 | rna_nucleotides = ['A', 'C', 'G', 'U', 'X'] |
||
| 66 | total_residues = 0 |
||
| 67 | rna_residues = 0 |
||
| 68 | for residue in chain: |
||
| 69 | total_residues += 1 |
||
| 70 | if residue.get_resname().strip() in rna_nucleotides: |
||
| 71 | rna_residues += 1 |
||
| 72 | if total_residues == 0: |
||
| 73 | return False |
||
| 74 | return (rna_residues / total_residues) >= threshold |
||
| 75 | |||
| 76 | parser = MMCIFParser() |
||
| 77 | structure = parser.get_structure("structure", cif_file) |
||
| 78 | |||
| 79 | if not split_conflicting_chains: |
||
| 80 | import string |
||
| 81 | letters = list(string.ascii_uppercase + string.ascii_lowercase + string.digits) |
||
| 82 | letter_iter = iter(letters) |
||
| 83 | for model in structure: |
||
| 84 | for chain in model: |
||
| 85 | try: |
||
| 86 | chain.id = next(letter_iter) |
||
| 87 | except StopIteration: |
||
| 88 | raise RuntimeError('Too many chains to assign unique single-letter IDs in %s' % cif_file) |
||
| 89 | io = PDBIO() |
||
| 90 | io.set_structure(structure) |
||
| 91 | io.save(pdb_file) |
||
| 92 | _prepend_header(pdb_file) |
||
| 93 | outputs.append(pdb_file) |
||
| 94 | if verbose: |
||
| 95 | print(f'saved: {pdb_file} (renamed chain ids)') |
||
| 96 | return outputs |
||
| 97 | |||
| 98 | import string |
||
| 99 | letters = list(string.ascii_uppercase) |
||
| 100 | |||
| 101 | for model in structure: |
||
| 102 | for chain in model: |
||
| 103 | if has_high_rna_content(chain): |
||
| 104 | new_structure = Structure.Structure("new_structure") |
||
| 105 | new_model = Model.Model(0) |
||
| 106 | new_structure.add(new_model) |
||
| 107 | |||
| 108 | chain_id_new = letters.pop(0) |
||
| 109 | chain_id = chain.get_id() |
||
| 110 | |||
| 111 | atom_count = 0 |
||
| 112 | for residue in chain: |
||
| 113 | for atom in residue: |
||
| 114 | atom_count += 1 |
||
| 115 | |||
| 116 | remarks = [f'REMARK rna chain {chain.id} -> {chain_id_new}'] |
||
| 117 | pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb') |
||
| 118 | if verbose: |
||
| 119 | print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}') |
||
| 120 | |||
| 121 | chain.id = chain_id_new |
||
| 122 | new_model.add(chain) |
||
| 123 | |||
| 124 | io = PDBIO() |
||
| 125 | io.set_structure(new_structure) |
||
| 126 | io.save(pdb_file) |
||
| 127 | _prepend_header(pdb_file, remarks) |
||
| 128 | outputs.append(pdb_file) |
||
| 129 | |||
| 130 | return outputs |
||
| 131 | |||
| 164 |