Passed
Push — master ( 3924a5...7a6481 )
by Marcin
03:31
created

build.rna_tools.rna_pdb_seq   A

Complexity

Total Complexity 1

Size/Duplication

Total Lines 134
Duplicated Lines 38.06 %

Importance

Changes 0
Metric Value
eloc 79
dl 51
loc 134
rs 10
c 0
b 0
f 0
wmc 1

1 Function

Rating   Name   Duplication   Size   Complexity  
A get_parser() 51 51 1

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
"""
4
5
"""
6
from __future__ import print_function
7
import argparse
8
from icecream import ic
9
import textwrap
10
import sys
11
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr))
12
ic.configureOutput(prefix='> ')
13
import os
14
15
from rna_tools.rna_tools_lib import edit_pdb, add_header, get_version, \
16
                          collapsed_view, fetch, fetch_ba, fetch_cif, replace_chain, RNAStructure, \
17
                          select_pdb_fragment, sort_strings, set_chain_for_struc
18
from rna_tools.tools.rna_x3dna.rna_x3dna import x3DNA
19
20
21
22 View Code Duplication
def get_parser():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
23
    parser = argparse.ArgumentParser(
24
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
25
26
    #parser.add_argument('-', "--", help="", default="")
27
28
    parser.add_argument('--save-to-file', help='<pdb>.fa', action='store_true')
29
30
    parser.add_argument('--compact',
31
                        help=textwrap.dedent("""with --get-seq, get it in compact view'
32
$ rna_pdb_tools.py --get-seq --compact *.pdb
33
# 20_Bujnicki_1
34
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68
35
# 20_Bujnicki_2
36
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68
37
# 20_Bujnicki_3
38
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU # A:1-68
39
# 20_Bujnicki_4
40
41
"""), action='store_true')
42
43
    parser.add_argument('--color-seq', help='color seq, works with --get-seq', action='store_true')
44
45
    parser.add_argument('--chain-first', help="", action='store_true')
46
47
    parser.add_argument('--fasta',
48
                        help= textwrap.dedent("""with --get-seq, show sequences in fasta format,
49
can be combined with --compact (mind, chains will be separated with ' ' in one line)
50
51
$ rna_pdb_tools.py --get-seq --fasta --compact input/20_Bujnicki_1.pdb
52
> 20_Bujnicki_1
53
ACCCGCAAGGCCGACGGC GCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU
54
55
"""), action='store_true')
56
    parser.add_argument('--oneline', help="", action='store_true')
57
58
59
    parser.add_argument('--uniq', help=textwrap.dedent("""
60
rna_pdb_tools.py --get-seq --uniq '[:5]' --compact --chain-first * | sort
61
A:1-121        ACCUUGCGCAACUGGCGAAUCCUGGGGCUGCCGCCGGCAGUACCC...CA # rp13nc3295_min.out.1
62
A:1-123        ACCUUGCGCGACUGGCGAAUCCUGAAGCUGCUUUGAGCGGCUUCG...AG # rp13cp0016_min.out.1
63
A:1-123        ACCUUGCGCGACUGGCGAAUCCUGAAGCUGCUUUGAGCGGCUUCG...AG # zcp_6537608a_ALL-000001_AA
64
A:1-45 57-71   GGGUCGUGACUGGCGAACAGGUGGGAAACCACCGGGGAGCGACCCGCCGCCCGCCUGGGC # solution
65
"""))
66
67
    parser.add_argument('--renum-atoms', help='renumber atoms, tested with --get-seq',
68
                         action='store_true')
69
    parser.add_argument("-v", "--verbose",
70
                        action="store_true", help="be verbose")
71
    parser.add_argument("file", help="", default="") # nargs='+')
72
    return parser
73
74
75
if __name__ == '__main__':
76
        parser = get_parser()
77
        args = parser.parse_args()
78
79
        # quick fix - make a list on the spot
80
        if list != type(args.file):
81
            args.file = [args.file]
82
        ##################################
83
        analyzed = []
84
        for f in args.file:
85
            #####################################
86
            if args.uniq:
87
                subname = eval('f' + args.uniq)
88
                if subname in analyzed:
89
                    continue
90
                else:
91
                    analyzed.append(subname)
92
            ########
93
            s = RNAStructure(f)
94
            if not s.is_pdb():
95
                print('Error: Not a PDB file %s' % f)
96
                sys.exit(1)
97
            s.decap_gtp()
98
            s.std_resn()
99
            s.remove_hydrogen()
100
            s.remove_ion()
101
            s.remove_water()
102
            if args.renum_atoms:
103
                s.renum_atoms()
104
            s.fix_O_in_UC()
105
            s.fix_op_atoms()
106
107
            output = ''
108
109
            # with # is easier to grep this out
110
            if args.fasta:
111
                # s.fn vs s.name
112
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, fasta=args.fasta, addfn=s.name, color=args.color_seq) + '\n'
113
            elif args.oneline:
114
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq).strip() + ' # '+ os.path.basename(f.replace('.pdb', '')) + '\n'
115
            else:
116
                output += '# ' + os.path.basename(f.replace('.pdb', '')) + '\n'
117
                output += s.get_seq(compact=args.compact, chainfirst=args.chain_first, color=args.color_seq) + '\n'
118
119
            try:
120
                sys.stdout.write(output)
121
                sys.stdout.flush()
122
            except IOError:
123
                pass
124
125
            if args.save_to_file:
126
                 from rna_pdb_fetch_header import fetch_pdb_header
127
                 pdb_id = f.split('/')[-1].split('_')[0]
128
                 header_info = fetch_pdb_header(pdb_id)
129
                 title = header_info.get('entry', {}).get('struct', {}).get('title', 'Title not found')
130
                 output = '# ' + title + '\n' + output
131
                 with open(f.replace('.pdb', '.txt'), 'w') as out:
132
                    out.write(output)
133
                    print(output)
134
135
136