build.rna_tools.tools.rna_alignment.rna_align_coverage.get_parser() - Code Metrics - Inspection of "rna_align_coverage.py" - mmagnus/rna-tools - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 7a6481...9f3416 )

by Marcin

created 2024-12-13 23:44 UTC

get_parser() A

↳ Parent: build.rna_tools.tools.rna_alignment.rna_align_coverage

Complexity

Conditions

Size

Total Lines	8
Code Lines	8

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
eloc	8
nop	0
dl	0
loc	8
rs	10
c	0
b	0
f	0

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""

"""
from Bio import AlignIO
import argparse
from icecream import ic
import sys
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr))
ic.configureOutput(prefix='> ')


def get_parser():
    parser = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-v", "--verbose",
                        action="store_true", help="be verbose")
    parser.add_argument("sequence", help="", default="") # nargs='+')
    parser.add_argument("stk", help="", default="") # nargs='+')
    return parser


if __name__ == '__main__':
    parser = get_parser()
    args = parser.parse_args()

    verbose = args.verbose
    
    # Load the alignment
    alignment = AlignIO.read(args.stk, "stockholm")
    alignment_name = args.stk.split('/')[-1]
    # Define the target sequence ID
    target_id = args.sequence

    
    # Find the target sequence
    target_seq = None
    for record in alignment:
        if record.id == target_id:
            target_seq = record.seq
            break

    if target_seq is None:
        print(f"Target sequence '{target_id}' not found in the alignment.")
    else:
        seq_len = 0
        cols_no_coverage = 0
        # Collect columns where the target sequence has no gaps
        filtered_columns = []
        for col_index in range(alignment.get_alignment_length()):
            if target_seq[col_index] != "-":
                column = [record.seq[col_index] for record in alignment]
                filtered_columns.append(column)
                seq_len += 1

        # Display filtered columns
        if verbose: print(f"Columns where the target sequence '{target_id}' has no gaps:")
        for col in filtered_columns:
            if verbose: print(col, len(col))
            # Count the number of '-'
            gap_count = col.count('-')
            if verbose: print(f"Number of '-': {gap_count}")
            ratio_per_col = gap_count/(len(col) - 1)
            if verbose: print(f'ratio of gaps: {ratio_per_col}') # -1 target_sequence
            if ratio_per_col > 0.5:
                if verbose: print('WARNING: more than 50% gaps in column', cols_no_coverage)
                cols_no_coverage +=1
            if verbose: print()

        ratio = round((1 - (cols_no_coverage / seq_len)) * 100)
        print(f"Query {target_id} length {seq_len} in the alignment {alignment_name} number of columns with low coverage (<50%) for the query sequence {cols_no_coverage}; coverge for the query: {ratio}%") 


1			#!/usr/bin/env python
2			# -- coding: utf-8 --
3			"""
4
5			"""
6			from Bio import AlignIO
7			import argparse
8			from icecream import ic
9			import sys
10			ic.configureOutput(outputFunction=lambda a: print(a, file=sys.stderr))
11			ic.configureOutput(prefix='> ')
12
13
14			def get_parser():
15			parser = argparse.ArgumentParser(
16			description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
17			parser.add_argument("-v", "--verbose",
18			action="store_true", help="be verbose")
19			parser.add_argument("sequence", help="", default="") # nargs='+')
20			parser.add_argument("stk", help="", default="") # nargs='+')
21			return parser
22
23
24			if __name__ == '__main__':
25			parser = get_parser()
26			args = parser.parse_args()
27
28			verbose = args.verbose
29
30			# Load the alignment
31			alignment = AlignIO.read(args.stk, "stockholm")
32			alignment_name = args.stk.split('/')[-1]
33			# Define the target sequence ID
34			target_id = args.sequence
35
36
37			# Find the target sequence
38			target_seq = None
39			for record in alignment:
40			if record.id == target_id:
41			target_seq = record.seq
42			break
43
44			if target_seq is None:
45			print(f"Target sequence '{target_id}' not found in the alignment.")
46			else:
47			seq_len = 0
48			cols_no_coverage = 0
49			# Collect columns where the target sequence has no gaps
50			filtered_columns = []
51			for col_index in range(alignment.get_alignment_length()):
52			if target_seq[col_index] != "-":
53			column = [record.seq[col_index] for record in alignment]
54			filtered_columns.append(column)
55			seq_len += 1
56
57			# Display filtered columns
58			if verbose: print(f"Columns where the target sequence '{target_id}' has no gaps:")
59			for col in filtered_columns:
60			if verbose: print(col, len(col))
61			# Count the number of '-'
62			gap_count = col.count('-')
63			if verbose: print(f"Number of '-': {gap_count}")
64			ratio_per_col = gap_count/(len(col) - 1)
65			if verbose: print(f'ratio of gaps: {ratio_per_col}') # -1 target_sequence
66			if ratio_per_col > 0.5:
67			if verbose: print('WARNING: more than 50% gaps in column', cols_no_coverage)
68			cols_no_coverage +=1
69			if verbose: print()
70
71			ratio = round((1 - (cols_no_coverage / seq_len)) * 100)
72			print(f"Query {target_id} length {seq_len} in the alignment {alignment_name} number of columns with low coverage (<50%) for the query sequence {cols_no_coverage}; coverge for the query: {ratio}%")
73

mmagnus / rna-tools

Push — master ( 7a6481...9f3416 )

get_parser() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like