| 
                    1
                 | 
                                    
                                                     | 
                
                 | 
                #!/usr/bin/env python  | 
            
            
                                                        
            
                                    
            
            
                | 
                    2
                 | 
                                    
                                                     | 
                
                 | 
                # -*- coding: utf-8 -*-  | 
            
            
                                                        
            
                                    
            
            
                | 
                    3
                 | 
                                    
                                                     | 
                
                 | 
                from rna_tools.tools.pdb_formatix.SingleLineUtils import get_res_code, get_res_num, get_atom_code, \  | 
            
            
                                                        
            
                                    
            
            
                | 
                    4
                 | 
                                    
                                                     | 
                
                 | 
                    set_atom_code, set_line_bfactor  | 
            
            
                                                        
            
                                    
            
            
                | 
                    5
                 | 
                                    
                                                     | 
                
                 | 
                import re  | 
            
            
                                                        
            
                                    
            
            
                | 
                    6
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    7
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    8
                 | 
                                    
                                                     | 
                
                 | 
                class PDBFile(object):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    9
                 | 
                                    
                                                     | 
                
                 | 
                    """Class for holding data from a PDB file and modifying it.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    10
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    11
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    12
                 | 
                                    
                                                     | 
                
                 | 
                    # find 'ATOM' lines in a PDB file  | 
            
            
                                                        
            
                                    
            
            
                | 
                    13
                 | 
                                    
                                                     | 
                
                 | 
                    ATOM_LINE_PATTERN = re.compile('^ATOM') | 
            
            
                                                        
            
                                    
            
            
                | 
                    14
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    15
                 | 
                                    
                                                     | 
                
                 | 
                    # dictionary for changing residue names from 3 letters to 1 letter  | 
            
            
                                                        
            
                                    
            
            
                | 
                    16
                 | 
                                    
                                                     | 
                
                 | 
                    RES3TO1 = { | 
            
            
                                                        
            
                                    
            
            
                | 
                    17
                 | 
                                    
                                                     | 
                
                 | 
                        'GUA': 'G',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    18
                 | 
                                    
                                                     | 
                
                 | 
                        'URI': 'U',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    19
                 | 
                                    
                                                     | 
                
                 | 
                        'CYT': 'C',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    20
                 | 
                                    
                                                     | 
                
                 | 
                        'ADE': 'A',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    21
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    22
                 | 
                                    
                                                     | 
                
                 | 
                        'RG': 'G',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    23
                 | 
                                    
                                                     | 
                
                 | 
                        'RU': 'U',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    24
                 | 
                                    
                                                     | 
                
                 | 
                        'RC': 'C',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    25
                 | 
                                    
                                                     | 
                
                 | 
                        'RA': 'A',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    26
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    27
                 | 
                                    
                                                     | 
                
                 | 
                        'R3G': 'G',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    28
                 | 
                                    
                                                     | 
                
                 | 
                        'R3U': 'U',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    29
                 | 
                                    
                                                     | 
                
                 | 
                        'R3C': 'C',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    30
                 | 
                                    
                                                     | 
                
                 | 
                        'R3A': 'A',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    31
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    32
                 | 
                                    
                                                     | 
                
                 | 
                        'R5G': 'G',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    33
                 | 
                                    
                                                     | 
                
                 | 
                        'R5U': 'U',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    34
                 | 
                                    
                                                     | 
                
                 | 
                        'R5C': 'C',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    35
                 | 
                                    
                                                     | 
                
                 | 
                        'R5A': 'A',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    36
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    37
                 | 
                                    
                                                     | 
                
                 | 
                        'RG3': 'G',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    38
                 | 
                                    
                                                     | 
                
                 | 
                        'RU3': 'U',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    39
                 | 
                                    
                                                     | 
                
                 | 
                        'RC3': 'C',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    40
                 | 
                                    
                                                     | 
                
                 | 
                        'RA3': 'A',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    41
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    42
                 | 
                                    
                                                     | 
                
                 | 
                        'RG5': 'G',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    43
                 | 
                                    
                                                     | 
                
                 | 
                        'RU5': 'U',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    44
                 | 
                                    
                                                     | 
                
                 | 
                        'RC5': 'C',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    45
                 | 
                                    
                                                     | 
                
                 | 
                        'RA5': 'A',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    46
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    47
                 | 
                                    
                                                     | 
                
                 | 
                        'RGU': 'G',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    48
                 | 
                                    
                                                     | 
                
                 | 
                        'URA': 'U',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    49
                 | 
                                    
                                                     | 
                
                 | 
                        'RCY': 'C',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    50
                 | 
                                    
                                                     | 
                
                 | 
                        'RAD': 'A',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    51
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    52
                 | 
                                    
                                                     | 
                
                 | 
                        # just in case  | 
            
            
                                                        
            
                                    
            
            
                | 
                    53
                 | 
                                    
                                                     | 
                
                 | 
                        'G': 'G',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    54
                 | 
                                    
                                                     | 
                
                 | 
                        'U': 'U',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    55
                 | 
                                    
                                                     | 
                
                 | 
                        'C': 'C',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    56
                 | 
                                    
                                                     | 
                
                 | 
                        'A': 'A',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    57
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                        
            
                                    
            
            
                | 
                    58
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    59
                 | 
                                    
                                                     | 
                
                 | 
                    AMINOACID_CODES = ("Ala", "Arg", "Asn", "Asp", "Cys", "Glu", "Gln", "Gly", | 
            
            
                                                        
            
                                    
            
            
                | 
                    60
                 | 
                                    
                                                     | 
                
                 | 
                                       "His", "Ile", "Leu", "Lys", "Met", "Phe", "Pro", "Ser", "Thr",  | 
            
            
                                                        
            
                                    
            
            
                | 
                    61
                 | 
                                    
                                                     | 
                
                 | 
                                       "Trp", "Tyr", "Val",  | 
            
            
                                                        
            
                                    
            
            
                | 
                    62
                 | 
                                    
                                                     | 
                
                 | 
                                       "ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", "GLY",  | 
            
            
                                                        
            
                                    
            
            
                | 
                    63
                 | 
                                    
                                                     | 
                
                 | 
                                       "HIS", "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR",  | 
            
            
                                                        
            
                                    
            
            
                | 
                    64
                 | 
                                    
                                                     | 
                
                 | 
                                       "TRP", "TYR", "VAL"  | 
            
            
                                                        
            
                                    
            
            
                | 
                    65
                 | 
                                    
                                                     | 
                
                 | 
                                       )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    66
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    67
                 | 
                                    
                                                     | 
                
                 | 
                    def __init__(self, pdb_string=None, pdb_path=None, pdb_handle=None, verbose=False):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    68
                 | 
                                    
                                                     | 
                
                 | 
                        """Constructor, should get exactly one of the arguments:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    69
                 | 
                                    
                                                     | 
                
                 | 
                             * pdb_string = PDB as a string  | 
            
            
                                                        
            
                                    
            
            
                | 
                    70
                 | 
                                    
                                                     | 
                
                 | 
                             * pdb_path = string with path to a PDB file  | 
            
            
                                                        
            
                                    
            
            
                | 
                    71
                 | 
                                    
                                                     | 
                
                 | 
                             * pdb_handle = handle to a PDB file  | 
            
            
                                                        
            
                                    
            
            
                | 
                    72
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    73
                 | 
                                    
                                                     | 
                
                 | 
                        self.verbose = verbose  | 
            
            
                                                        
            
                                    
            
            
                | 
                    74
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    75
                 | 
                                    
                                                     | 
                
                 | 
                        if len([x for x in [pdb_string, pdb_path, pdb_handle] if x is not None]) > 1:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    76
                 | 
                                    
                                                     | 
                
                 | 
                            print('You should provide at most one source for PDB file') | 
            
            
                                                        
            
                                    
            
            
                | 
                    77
                 | 
                                    
                                                     | 
                
                 | 
                            raise Exception  | 
            
            
                                                        
            
                                    
            
            
                | 
                    78
                 | 
                                    
                                                     | 
                
                 | 
                        input_string = ''  | 
            
            
                                                        
            
                                    
            
            
                | 
                    79
                 | 
                                    
                                                     | 
                
                 | 
                        if pdb_string is not None:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    80
                 | 
                                    
                                                     | 
                
                 | 
                            input_string = pdb_string  | 
            
            
                                                        
            
                                    
            
            
                | 
                    81
                 | 
                                    
                                                     | 
                
                 | 
                        elif pdb_path is not None:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    82
                 | 
                                    
                                                     | 
                
                 | 
                            with open(pdb_path) as f:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    83
                 | 
                                    
                                                     | 
                
                 | 
                                input_string = f.read()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    84
                 | 
                                    
                                                     | 
                
                 | 
                        elif pdb_handle is not None:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    85
                 | 
                                    
                                                     | 
                
                 | 
                            input_string = pdb_handle.read()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    86
                 | 
                                    
                                                     | 
                
                 | 
                        self.pdb_string = input_string  | 
            
            
                                                        
            
                                    
            
            
                | 
                    87
                 | 
                                    
                                                     | 
                
                 | 
                        self.pdb_lines = self.pdb_string.split('\n') | 
            
            
                                                        
            
                                    
            
            
                | 
                    88
                 | 
                                    
                                                     | 
                
                 | 
                        self.fixes = []  | 
            
            
                                                        
            
                                    
            
            
                | 
                    89
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    90
                 | 
                                    
                                                     | 
                
                 | 
                    def save(self, file_path):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    91
                 | 
                                    
                                                     | 
                
                 | 
                        """Save current PDB to disk  | 
            
            
                                                        
            
                                    
            
            
                | 
                    92
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    93
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    94
                 | 
                                    
                                                     | 
                
                 | 
                          * file_path = path where it will be saved  | 
            
            
                                                        
            
                                    
            
            
                | 
                    95
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    96
                 | 
                                    
                                                     | 
                
                 | 
                        with open(file_path, 'w') as f:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    97
                 | 
                                    
                                                     | 
                
                 | 
                            f.write(self.pdb_string)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    98
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    99
                 | 
                                    
                                                     | 
                
                 | 
                    def _apply_fix(self, fix_name, result_string, result_lines=None):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    100
                 | 
                                    
                                                     | 
                
                 | 
                        """Helper function for applying fixes and saving information about them.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    101
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    102
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    103
                 | 
                                    
                                                     | 
                
                 | 
                          * fix_name = string that will be added to self.pdb_fixes, unless  | 
            
            
                                                        
            
                                    
            
            
                | 
                    104
                 | 
                                    
                                                     | 
                
                 | 
                            result_string is the same as pdb_string  | 
            
            
                                                        
            
                                    
            
            
                | 
                    105
                 | 
                                    
                                                     | 
                
                 | 
                          * result_string = string after applying this fix  | 
            
            
                                                        
            
                                    
            
            
                | 
                    106
                 | 
                                    
                                                     | 
                
                 | 
                          * result_lines = optional, list of lines, use this argument if you  | 
            
            
                                                        
            
                                    
            
            
                | 
                    107
                 | 
                                    
                                                     | 
                
                 | 
                            already have such list and don't want to lose time on splitting  | 
            
            
                                                        
            
                                    
            
            
                | 
                    108
                 | 
                                    
                                                     | 
                
                 | 
                            result_string  | 
            
            
                                                        
            
                                    
            
            
                | 
                    109
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    110
                 | 
                                    
                                                     | 
                
                 | 
                        if self.pdb_string != result_string:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    111
                 | 
                                    
                                                     | 
                
                 | 
                            self.fixes.append(fix_name)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    112
                 | 
                                    
                                                     | 
                
                 | 
                            self.pdb_string = result_string  | 
            
            
                                                        
            
                                    
            
            
                | 
                    113
                 | 
                                    
                                                     | 
                
                 | 
                            if result_lines is not None:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    114
                 | 
                                    
                                                     | 
                
                 | 
                                self.pdb_lines = result_lines  | 
            
            
                                                        
            
                                    
            
            
                | 
                    115
                 | 
                                    
                                                     | 
                
                 | 
                            else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    116
                 | 
                                    
                                                     | 
                
                 | 
                                self.pdb_lines = self.pdb_string.split('\n') | 
            
            
                                                        
            
                                    
            
            
                | 
                    117
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    118
                 | 
                                    
                                                     | 
                
                 | 
                    def set_string(self, pdb_string):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    119
                 | 
                                    
                                                     | 
                
                 | 
                        """Change PDB string stored in this instance of the class  | 
            
            
                                                        
            
                                    
            
            
                | 
                    120
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    121
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    122
                 | 
                                    
                                                     | 
                
                 | 
                          * pdb_string = new PDB string  | 
            
            
                                                        
            
                                    
            
            
                | 
                    123
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    124
                 | 
                                    
                                                     | 
                
                 | 
                        self.pdb_string = pdb_string  | 
            
            
                                                        
            
                                    
            
            
                | 
                    125
                 | 
                                    
                                                     | 
                
                 | 
                        self.pdb_lines = pdb_string.split('\n') | 
            
            
                                                        
            
                                    
            
            
                | 
                    126
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    127
                 | 
                                    
                                                     | 
                
                 | 
                    def _get_atom_lines(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    128
                 | 
                                    
                                                     | 
                
                 | 
                        """Get only lines with ATOM information  | 
            
            
                                                        
            
                                    
            
            
                | 
                    129
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    130
                 | 
                                    
                                                     | 
                
                 | 
                        return [l for l in self.pdb_lines if re.match(self.ATOM_LINE_PATTERN, l)]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    131
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    132
                 | 
                                    
                                                     | 
                
                 | 
                    def validate_pdb(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    133
                 | 
                                    
                                                     | 
                
                 | 
                        """Check if file is a PDB structure  | 
            
            
                                                        
            
                                    
            
            
                | 
                    134
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    135
                 | 
                                    
                                                     | 
                
                 | 
                        Output:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    136
                 | 
                                    
                                                     | 
                
                 | 
                          * True if it is a PDB, False otherwise  | 
            
            
                                                        
            
                                    
            
            
                | 
                    137
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    138
                 | 
                                    
                                                     | 
                
                 | 
                        atom_lines = self._get_atom_lines()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    139
                 | 
                                    
                                                     | 
                
                 | 
                        if not len(atom_lines):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    140
                 | 
                                    
                                                     | 
                
                 | 
                            return False  | 
            
            
                                                        
            
                                    
            
            
                | 
                    141
                 | 
                                    
                                                     | 
                
                 | 
                        return True  | 
            
            
                                                        
            
                                    
            
            
                | 
                    142
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    143
                 | 
                                    
                                                     | 
                
                 | 
                    def detect_proteins(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    144
                 | 
                                    
                                                     | 
                
                 | 
                        """Check if there are any aminoacid fragments in the file  | 
            
            
                                                        
            
                                    
            
            
                | 
                    145
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    146
                 | 
                                    
                                                     | 
                
                 | 
                        Output:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    147
                 | 
                                    
                                                     | 
                
                 | 
                          * True if there are some, False otherwise  | 
            
            
                                                        
            
                                    
            
            
                | 
                    148
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    149
                 | 
                                    
                                                     | 
                
                 | 
                        for l in self.pdb_lines:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    150
                 | 
                                    
                                                     | 
                
                 | 
                            if get_res_code(l) in self.AMINOACID_CODES:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    151
                 | 
                                    
                                                     | 
                
                 | 
                                return True  | 
            
            
                                                        
            
                                    
            
            
                | 
                    152
                 | 
                                    
                                                     | 
                
                 | 
                        return False  | 
            
            
                                                        
            
                                    
            
            
                | 
                    153
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    154
                 | 
                                    
                                                     | 
                
                 | 
                    def seq_from_pdb(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    155
                 | 
                                    
                                                     | 
                
                 | 
                        """Extract sequence from a PDB and return it as a string.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    156
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    157
                 | 
                                    
                                                     | 
                
                 | 
                        Output:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    158
                 | 
                                    
                                                     | 
                
                 | 
                          * sequence, returned as a string  | 
            
            
                                                        
            
                                    
            
            
                | 
                    159
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    160
                 | 
                                    
                                                     | 
                
                 | 
                        atoms = [l.split() for l in self._get_atom_lines()]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    161
                 | 
                                    
                                                     | 
                
                 | 
                        if atoms[0][3][0] != 'r':  # it is 'r' if it's a ROSETTA PDB  | 
            
            
                                                        
            
                                    
            
            
                | 
                    162
                 | 
                                    
                                                     | 
                
                 | 
                            seq = [self.RES3TO1[atoms[0][3]]]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    163
                 | 
                                    
                                                     | 
                
                 | 
                        else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    164
                 | 
                                    
                                                     | 
                
                 | 
                            seq = [atoms[0][3][1]]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    165
                 | 
                                    
                                                     | 
                
                 | 
                        for a in range(1, len(atoms)):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    166
                 | 
                                    
                                                     | 
                
                 | 
                            # atom number is different than previous one  | 
            
            
                                                        
            
                                    
            
            
                | 
                    167
                 | 
                                    
                                                     | 
                
                 | 
                            if atoms[a][5] != atoms[a - 1][5]:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    168
                 | 
                                    
                                                     | 
                
                 | 
                                if atoms[a][3][0] != 'r':  # check for ROSETTA PDB  | 
            
            
                                                        
            
                                    
            
            
                | 
                    169
                 | 
                                    
                                                     | 
                
                 | 
                                    seq.append(self.RES3TO1[atoms[a][3]])  | 
            
            
                                                        
            
                                    
            
            
                | 
                    170
                 | 
                                    
                                                     | 
                
                 | 
                                else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    171
                 | 
                                    
                                                     | 
                
                 | 
                                    seq.append(atoms[a][3][1])  | 
            
            
                                                        
            
                                    
            
            
                | 
                    172
                 | 
                                    
                                                     | 
                
                 | 
                        return ''.join(seq)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    173
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    174
                 | 
                                    
                                                     | 
                
                 | 
                    def seq_from_amber_like_pdb(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    175
                 | 
                                    
                                                     | 
                
                 | 
                        """Extract sequence from a PDB and return it as a string - use it for amber-like files.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    176
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    177
                 | 
                                    
                                                     | 
                
                 | 
                        Output:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    178
                 | 
                                    
                                                     | 
                
                 | 
                          * sequence, returned as a string, such as: RG5 RC RU RG RG RG RC RG RC RA RG RG3 RC5 RC RU RG RA RC RG RG RU RA RC RA RG RC3  | 
            
            
                                                        
            
                                    
            
            
                | 
                    179
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    180
                 | 
                                    
                                                     | 
                
                 | 
                        atoms = [l.split() for l in self._get_atom_lines()]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    181
                 | 
                                    
                                                     | 
                
                 | 
                        seq = []  | 
            
            
                                                        
            
                                    
            
            
                | 
                    182
                 | 
                                    
                                                     | 
                
                 | 
                        seq.append(atoms[0][3])  | 
            
            
                                                        
            
                                    
            
            
                | 
                    183
                 | 
                                    
                                                     | 
                
                 | 
                        for a in range(1, len(atoms)):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    184
                 | 
                                    
                                                     | 
                
                 | 
                            # atom number is different than previous one  | 
            
            
                                                        
            
                                    
            
            
                | 
                    185
                 | 
                                    
                                                     | 
                
                 | 
                            if self.verbose:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    186
                 | 
                                    
                                                     | 
                
                 | 
                                print((atoms[a][5], atoms[a - 1][5]))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    187
                 | 
                                    
                                                     | 
                
                 | 
                            if atoms[a][5] != atoms[a - 1][5]:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    188
                 | 
                                    
                                                     | 
                
                 | 
                                seq.append(atoms[a][3])  | 
            
            
                                                        
            
                                    
            
            
                | 
                    189
                 | 
                                    
                                                     | 
                
                 | 
                        return ' '.join(seq)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    190
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    191
                 | 
                                    
                                                     | 
                
                 | 
                    def get_fasta(self, name='seq', lowercase=False):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    192
                 | 
                                    
                                                     | 
                
                 | 
                        """Format sequence in FASTA format, with a header and lines split  | 
            
            
                                                        
            
                                    
            
            
                | 
                    193
                 | 
                                    
                                                     | 
                
                 | 
                        at 80 characters.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    194
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    195
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    196
                 | 
                                    
                                                     | 
                
                 | 
                          * name = name of the sequence (it's put in the header line)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    197
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    198
                 | 
                                    
                                                     | 
                
                 | 
                        Output:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    199
                 | 
                                    
                                                     | 
                
                 | 
                          * FASTA returned as a string  | 
            
            
                                                        
            
                                    
            
            
                | 
                    200
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    201
                 | 
                                    
                                                     | 
                
                 | 
                        seq = self.seq_from_pdb()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    202
                 | 
                                    
                                                     | 
                
                 | 
                        if lowercase:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    203
                 | 
                                    
                                                     | 
                
                 | 
                            seq = seq.lower()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    204
                 | 
                                    
                                                     | 
                
                 | 
                        result = ['>' + name]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    205
                 | 
                                    
                                                     | 
                
                 | 
                        result.extend([seq[i:i + 80] for i in range(0, len(seq), 80)])  | 
            
            
                                                        
            
                                    
            
            
                | 
                    206
                 | 
                                    
                                                     | 
                
                 | 
                        result.append('') | 
            
            
                                                        
            
                                    
            
            
                | 
                    207
                 | 
                                    
                                                     | 
                
                 | 
                        return '\n'.join(result)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    208
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    209
                 | 
                                    
                                                     | 
                
                 | 
                    def remove_non_atoms(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    210
                 | 
                                    
                                                     | 
                
                 | 
                        """Remove all lines that are not ATOMs  | 
            
            
                                                        
            
                                    
            
            
                | 
                    211
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    212
                 | 
                                    
                                                     | 
                
                 | 
                        result = self._get_atom_lines()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    213
                 | 
                                    
                                                     | 
                
                 | 
                        self._apply_fix('Removed non-atom lines', '\n'.join(result), result) | 
            
            
                                                        
            
                                    
            
            
                | 
                    214
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    215
                 | 
                                    
                                                     | 
                
                 | 
                    def _check_resname_3(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    216
                 | 
                                    
                                                     | 
                
                 | 
                        """Check if PDB uses 3 or 1 letter residue names.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    217
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    218
                 | 
                                    
                                                     | 
                
                 | 
                        Output:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    219
                 | 
                                    
                                                     | 
                
                 | 
                          * bool, True if 3 letters, False otherwise  | 
            
            
                                                        
            
                                    
            
            
                | 
                    220
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    221
                 | 
                                    
                                                     | 
                
                 | 
                        for l in self.pdb_lines:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    222
                 | 
                                    
                                                     | 
                
                 | 
                            if l.startswith('ATOM'): | 
            
            
                                                        
            
                                    
            
            
                | 
                    223
                 | 
                                    
                                                     | 
                
                 | 
                                return 3 == len(l.split()[3])  | 
            
            
                                                        
            
                                    
            
            
                | 
                    224
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    225
                 | 
                                    
                                                     | 
                
                 | 
                    def _resname_3to1(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    226
                 | 
                                    
                                                     | 
                
                 | 
                        """Convert residue names in PDB file from 3 letters to 1 letter.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    227
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    228
                 | 
                                    
                                                     | 
                
                 | 
                        result = []  | 
            
            
                                                        
            
                                    
            
            
                | 
                    229
                 | 
                                    
                                                     | 
                
                 | 
                        for l in self.pdb_lines:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    230
                 | 
                                    
                                                     | 
                
                 | 
                            if l.startswith('ATOM'): | 
            
            
                                                        
            
                                    
            
            
                | 
                    231
                 | 
                                    
                                                     | 
                
                 | 
                                long_name = l.split()[3]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    232
                 | 
                                    
                                                     | 
                
                 | 
                                try:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    233
                 | 
                                    
                                                     | 
                
                 | 
                                    short_name = self.RES3TO1[long_name]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    234
                 | 
                                    
                                                     | 
                
                 | 
                                except KeyError:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    235
                 | 
                                    
                                                     | 
                
                 | 
                                    short_name = 'X'  | 
            
            
                                                        
            
                                    
            
            
                | 
                    236
                 | 
                                    
                                                     | 
                
                 | 
                                result.append(l[:17] + '  ' + short_name + l[20:])  | 
            
            
                                                        
            
                                    
            
            
                | 
                    237
                 | 
                                    
                                                     | 
                
                 | 
                            else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    238
                 | 
                                    
                                                     | 
                
                 | 
                                result.append(l)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    239
                 | 
                                    
                                                     | 
                
                 | 
                        self._apply_fix('resname_3to1', '\n'.join(result), result) | 
            
            
                                                        
            
                                    
            
            
                | 
                    240
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    241
                 | 
                                    
                                                     | 
                
                 | 
                    def resname_check_and_3to1(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    242
                 | 
                                    
                                                     | 
                
                 | 
                        """Check if resnames are 3 letter long and if so convert them to 1 letter.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    243
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    244
                 | 
                                    
                                                     | 
                
                 | 
                        if self._check_resname_3():  | 
            
            
                                                        
            
                                    
            
            
                | 
                    245
                 | 
                                    
                                                     | 
                
                 | 
                            try:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    246
                 | 
                                    
                                                     | 
                
                 | 
                                self._resname_3to1()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    247
                 | 
                                    
                                                     | 
                
                 | 
                            except:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    248
                 | 
                                    
                                                     | 
                
                 | 
                                print('Conversion to 1-letter residue names failed') | 
            
            
                                                        
            
                                    
            
            
                | 
                    249
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    250
                 | 
                                    
                                                     | 
                
                 | 
                    def terminate_chains(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    251
                 | 
                                    
                                                     | 
                
                 | 
                        """Add 'TER' at the end of chain if none 'TER's are found in PDB.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    252
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    253
                 | 
                                    
                                                     | 
                
                 | 
                        for l in self.pdb_lines:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    254
                 | 
                                    
                                                     | 
                
                 | 
                            if l.startswith('TER'): | 
            
            
                                                        
            
                                    
            
            
                | 
                    255
                 | 
                                    
                                                     | 
                
                 | 
                                return  | 
            
            
                                                        
            
                                    
            
            
                | 
                    256
                 | 
                                    
                                                     | 
                
                 | 
                        else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    257
                 | 
                                    
                                                     | 
                
                 | 
                            result = []  | 
            
            
                                                        
            
                                    
            
            
                | 
                    258
                 | 
                                    
                                                     | 
                
                 | 
                            chain_started = False  | 
            
            
                                                        
            
                                    
            
            
                | 
                    259
                 | 
                                    
                                                     | 
                
                 | 
                            ter_added = False  | 
            
            
                                                        
            
                                    
            
            
                | 
                    260
                 | 
                                    
                                                     | 
                
                 | 
                            for l in self.pdb_lines:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    261
                 | 
                                    
                                                     | 
                
                 | 
                                if 'ATOM' in l:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    262
                 | 
                                    
                                                     | 
                
                 | 
                                    chain_started = True  | 
            
            
                                                        
            
                                    
            
            
                | 
                    263
                 | 
                                    
                                                     | 
                
                 | 
                                else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    264
                 | 
                                    
                                                     | 
                
                 | 
                                    if chain_started:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    265
                 | 
                                    
                                                     | 
                
                 | 
                                        result.append('TER') | 
            
            
                                                        
            
                                    
            
            
                | 
                    266
                 | 
                                    
                                                     | 
                
                 | 
                                        ter_added = True  | 
            
            
                                                        
            
                                    
            
            
                | 
                    267
                 | 
                                    
                                                     | 
                
                 | 
                                        chain_started = False  | 
            
            
                                                        
            
                                    
            
            
                | 
                    268
                 | 
                                    
                                                     | 
                
                 | 
                                result.append(l)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    269
                 | 
                                    
                                                     | 
                
                 | 
                        if not ter_added:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    270
                 | 
                                    
                                                     | 
                
                 | 
                            result.append('TER') | 
            
            
                                                        
            
                                    
            
            
                | 
                    271
                 | 
                                    
                                                     | 
                
                 | 
                        self._apply_fix('terminate_chains', '\n'.join(result), result) | 
            
            
                                                        
            
                                    
            
            
                | 
                    272
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    273
                 | 
                                    
                                                     | 
                
                 | 
                    def _split_by_ters(self, separator='TER\n'):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    274
                 | 
                                    
                                                     | 
                
                 | 
                        """Split a PDB string by TER lines or other separator  | 
            
            
                                                        
            
                                    
            
            
                | 
                    275
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    276
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    277
                 | 
                                    
                                                     | 
                
                 | 
                          * separator = optional, separator dividing structures, default is 'TER\n'  | 
            
            
                                                        
            
                                    
            
            
                | 
                    278
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    279
                 | 
                                    
                                                     | 
                
                 | 
                        return [i + separator for i in self.pdb_string.split(separator)]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    280
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    281
                 | 
                                    
                                                     | 
                
                 | 
                    def remove_short_chains(self, threshold=9):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    282
                 | 
                                    
                                                     | 
                
                 | 
                        """Remove chains that have chains with a small number of atoms.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    283
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    284
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    285
                 | 
                                    
                                                     | 
                
                 | 
                          * threshold = if chain has more atoms, it stays in the result  | 
            
            
                                                        
            
                                    
            
            
                | 
                    286
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    287
                 | 
                                    
                                                     | 
                
                 | 
                        chains = self.pdb_string.split('TER\n') | 
            
            
                                                        
            
                                    
            
            
                | 
                    288
                 | 
                                    
                                                     | 
                
                 | 
                        good_chains = [c for c in chains if len(c.split('\nATOM')) > threshold] | 
            
            
                                                        
            
                                    
            
            
                | 
                    289
                 | 
                                    
                                                     | 
                
                 | 
                        self._apply_fix('remove_short_chains', '\n'.join(good_chains)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    290
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    291
                 | 
                                    
                                                     | 
                
                 | 
                    def check_and_add_P_at_start(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    292
                 | 
                                    
                                                     | 
                
                 | 
                        residues = {} | 
            
            
                                                        
            
                                    
            
            
                | 
                    293
                 | 
                                    
                                                     | 
                
                 | 
                        for l in self.pdb_lines:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    294
                 | 
                                    
                                                     | 
                
                 | 
                            res_num = get_res_num(l)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    295
                 | 
                                    
                                                     | 
                
                 | 
                            if res_num in residues:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    296
                 | 
                                    
                                                     | 
                
                 | 
                                residues[res_num].append(l)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    297
                 | 
                                    
                                                     | 
                
                 | 
                            else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    298
                 | 
                                    
                                                     | 
                
                 | 
                                residues[res_num] = [l]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    299
                 | 
                                    
                                                     | 
                
                 | 
                        first_residue = residues[min(residues.keys())]  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    300
                 | 
                                    
                                                     | 
                
                 | 
                        # check P  | 
            
            
                                                        
            
                                    
            
            
                | 
                    301
                 | 
                                    
                                                     | 
                
                 | 
                        has_p = False  | 
            
            
                                                        
            
                                    
            
            
                | 
                    302
                 | 
                                    
                                                     | 
                
                 | 
                        for l in first_residue:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    303
                 | 
                                    
                                                     | 
                
                 | 
                            if get_atom_code(l) == 'P':  | 
            
            
                                                        
            
                                    
            
            
                | 
                    304
                 | 
                                    
                                                     | 
                
                 | 
                                has_p = True  | 
            
            
                                                        
            
                                    
            
            
                | 
                    305
                 | 
                                    
                                                     | 
                
                 | 
                        if not has_p:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    306
                 | 
                                    
                                                     | 
                
                 | 
                            # add P  | 
            
            
                                                        
            
                                    
            
            
                | 
                    307
                 | 
                                    
                                                     | 
                
                 | 
                            corrected_residue = []  | 
            
            
                                                        
            
                                    
            
            
                | 
                    308
                 | 
                                    
                                                     | 
                
                 | 
                            for l in first_residue:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    309
                 | 
                                    
                                                     | 
                
                 | 
                                if get_atom_code(l) == 'O5\'' or get_atom_code(l) == 'O5*':  | 
            
            
                                                        
            
                                    
            
            
                | 
                    310
                 | 
                                    
                                                     | 
                
                 | 
                                    corrected_residue.append(set_atom_code(l, 'P'))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    311
                 | 
                                    
                                                     | 
                
                 | 
                                else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    312
                 | 
                                    
                                                     | 
                
                 | 
                                    corrected_residue.append(l)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    313
                 | 
                                    
                                                     | 
                
                 | 
                            residues[min(residues.keys())] = corrected_residue  | 
            
            
                                                        
            
                                    
            
            
                | 
                    314
                 | 
                                    
                                                     | 
                
                 | 
                            residues = ['\n'.join(residues[r]) for r in residues]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    315
                 | 
                                    
                                                     | 
                
                 | 
                            self._apply_fix('add_P_at_start', '\n'.join(residues)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    316
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    317
                 | 
                                    
                                                     | 
                
                 | 
                    def set_residues_bfactor(self, bfactors):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    318
                 | 
                                    
                                                     | 
                
                 | 
                        """Set B-factor to any value you want  | 
            
            
                                                        
            
                                    
            
            
                | 
                    319
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    320
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    321
                 | 
                                    
                                                     | 
                
                 | 
                          * bfactors = list of B-factors that will be set, should be of same  | 
            
            
                                                        
            
                                    
            
            
                | 
                    322
                 | 
                                    
                                                     | 
                
                 | 
                            length as nucleotide sequence  | 
            
            
                                                        
            
                                    
            
            
                | 
                    323
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    324
                 | 
                                    
                                                     | 
                
                 | 
                        ans = []  | 
            
            
                                                        
            
                                    
            
            
                | 
                    325
                 | 
                                    
                                                     | 
                
                 | 
                        for l in self.pdb_lines:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    326
                 | 
                                    
                                                     | 
                
                 | 
                            if l.startswith('ATOM'): | 
            
            
                                                        
            
                                    
            
            
                | 
                    327
                 | 
                                    
                                                     | 
                
                 | 
                                res_num = get_res_num(l)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    328
                 | 
                                    
                                                     | 
                
                 | 
                                try:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    329
                 | 
                                    
                                                     | 
                
                 | 
                                    ans.append(set_line_bfactor(l, bfactors[res_num - 1]))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    330
                 | 
                                    
                                                     | 
                
                 | 
                                except IndexError:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    331
                 | 
                                    
                                                     | 
                
                 | 
                                    pass  | 
            
            
                                                        
            
                                    
            
            
                | 
                    332
                 | 
                                    
                                                     | 
                
                 | 
                            else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    333
                 | 
                                    
                                                     | 
                
                 | 
                                ans.append(l)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    334
                 | 
                                    
                                                     | 
                
                 | 
                        self.pdb_lines = ans  | 
            
            
                                                        
            
                                    
            
            
                | 
                    335
                 | 
                                    
                                                     | 
                
                 | 
                        self.pdb_string = '\n'.join(ans)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    336
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    337
                 | 
                                    
                                                     | 
                
                 | 
                    def pedantic_pdb(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    338
                 | 
                                    
                                                     | 
                
                 | 
                        """Do everything that's possible to fix PDB: 3-to-1, no HETATMs, TERs etc.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    339
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    340
                 | 
                                    
                                                     | 
                
                 | 
                        self.resname_check_and_3to1()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    341
                 | 
                                    
                                                     | 
                
                 | 
                        self.terminate_chains()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    342
                 | 
                                    
                                                     | 
                
                 | 
                        self.remove_short_chains()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    343
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    344
                 | 
                                    
                                                     | 
                
                 | 
                    def count_models(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    345
                 | 
                                    
                                                     | 
                
                 | 
                        """Count models in the PDB file  | 
            
            
                                                        
            
                                    
            
            
                | 
                    346
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    347
                 | 
                                    
                                                     | 
                
                 | 
                        Output:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    348
                 | 
                                    
                                                     | 
                
                 | 
                          * number of models as an int  | 
            
            
                                                        
            
                                    
            
            
                | 
                    349
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    350
                 | 
                                    
                                                     | 
                
                 | 
                        model_num = 0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    351
                 | 
                                    
                                                     | 
                
                 | 
                        for l in self.pdb_lines:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    352
                 | 
                                    
                                                     | 
                
                 | 
                            if l.startswith('MODEL'): | 
            
            
                                                        
            
                                    
            
            
                | 
                    353
                 | 
                                    
                                                     | 
                
                 | 
                                model_num += 1  | 
            
            
                                                        
            
                                    
            
            
                | 
                    354
                 | 
                                    
                                                     | 
                
                 | 
                        return model_num  | 
            
            
                                                        
            
                                    
            
            
                | 
                    355
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    356
                 | 
                                    
                                                     | 
                
                 | 
                    def get_model(self, model_num):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    357
                 | 
                                    
                                                     | 
                
                 | 
                        """Get n-th model from the file  | 
            
            
                                                        
            
                                    
            
            
                | 
                    358
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    359
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    360
                 | 
                                    
                                                     | 
                
                 | 
                          * model_num = number of model to get, starts with 0  | 
            
            
                                                        
            
                                    
            
            
                | 
                    361
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    362
                 | 
                                    
                                                     | 
                
                 | 
                        model_borders = list(zip(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    363
                 | 
                                    
                                                     | 
                
                 | 
                            [i[0] for i in enumerate(self.pdb_lines) if i[1].startswith('MODEL')], | 
            
            
                                                        
            
                                    
            
            
                | 
                    364
                 | 
                                    
                                                     | 
                
                 | 
                            [i[0] for i in enumerate(self.pdb_lines) if i[1].startswith('ENDMDL')] | 
            
            
                                                        
            
                                    
            
            
                | 
                    365
                 | 
                                    
                                                     | 
                
                 | 
                        ))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    366
                 | 
                                    
                                                     | 
                
                 | 
                        result = self.pdb_lines[:model_borders[0][0]]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    367
                 | 
                                    
                                                     | 
                
                 | 
                        result.extend(self.pdb_lines[model_borders[model_num][0]:model_borders[model_num][1] + 1])  | 
            
            
                                                        
            
                                    
            
            
                | 
                    368
                 | 
                                    
                                                     | 
                
                 | 
                        result.extend(self.pdb_lines[model_borders[-1][1] + 1:])  | 
            
            
                                                        
            
                                    
            
            
                | 
                    369
                 | 
                                    
                                                     | 
                
                 | 
                        self._apply_fix('get_model', '\n'.join(result), result) | 
            
            
                                                        
            
                                    
            
            
                | 
                    370
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    371
                 | 
                                    
                                                     | 
                
                 | 
                    def check_and_get_first_model(self):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    372
                 | 
                                    
                                                     | 
                
                 | 
                        """Check if there are more than one models and get only the first one  | 
            
            
                                                        
            
                                    
            
            
                | 
                    373
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    374
                 | 
                                    
                                                     | 
                
                 | 
                        if self.count_models() >= 2:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    375
                 | 
                                    
                                                     | 
                
                 | 
                            self.get_model(0)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    376
                 | 
                                    
                                                     | 
                
                 | 
                 |