Passed
Push — master ( 7a6481...9f3416 )
by Marcin
02:27
created

get_parser()   A

Complexity

Conditions 1

Size

Total Lines 8
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 8
nop 0
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
"""
4
5
"""
6
from Bio import AlignIO
7
import argparse
8
from icecream import ic
9
import sys
10
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr))
11
ic.configureOutput(prefix='> ')
12
13
14
def get_parser():
15
    parser = argparse.ArgumentParser(
16
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
17
    parser.add_argument("-v", "--verbose",
18
                        action="store_true", help="be verbose")
19
    parser.add_argument("sequence", help="", default="") # nargs='+')
20
    parser.add_argument("stk", help="", default="") # nargs='+')
21
    return parser
22
23
24
if __name__ == '__main__':
25
    parser = get_parser()
26
    args = parser.parse_args()
27
28
    verbose = args.verbose
29
    
30
    # Load the alignment
31
    alignment = AlignIO.read(args.stk, "stockholm")
32
    alignment_name = args.stk.split('/')[-1]
33
    # Define the target sequence ID
34
    target_id = args.sequence
35
36
    
37
    # Find the target sequence
38
    target_seq = None
39
    for record in alignment:
40
        if record.id == target_id:
41
            target_seq = record.seq
42
            break
43
44
    if target_seq is None:
45
        print(f"Target sequence '{target_id}' not found in the alignment.")
46
    else:
47
        seq_len = 0
48
        cols_no_coverage = 0
49
        # Collect columns where the target sequence has no gaps
50
        filtered_columns = []
51
        for col_index in range(alignment.get_alignment_length()):
52
            if target_seq[col_index] != "-":
53
                column = [record.seq[col_index] for record in alignment]
54
                filtered_columns.append(column)
55
                seq_len += 1
56
57
        # Display filtered columns
58
        if verbose: print(f"Columns where the target sequence '{target_id}' has no gaps:")
59
        for col in filtered_columns:
60
            if verbose: print(col, len(col))
61
            # Count the number of '-'
62
            gap_count = col.count('-')
63
            if verbose: print(f"Number of '-': {gap_count}")
64
            ratio_per_col = gap_count/(len(col) - 1)
65
            if verbose: print(f'ratio of gaps: {ratio_per_col}') # -1 target_sequence
66
            if ratio_per_col > 0.5:
67
                if verbose: print('WARNING: more than 50% gaps in column', cols_no_coverage)
68
                cols_no_coverage +=1
69
            if verbose: print()
70
71
        ratio = round((1 - (cols_no_coverage / seq_len)) * 100)
72
        print(f"Query {target_id} length {seq_len} in the alignment {alignment_name} number of columns with low coverage (<50%) for the query sequence {cols_no_coverage}; coverge for the query: {ratio}%") 
73