ReadGoids._read_txt() - Code Metrics - tanghaibao/goatools - Measure and Improve Code Quality continuously with Scrutinizer

ReadGoids._read_txt() D
last analyzed 2018-07-02 19:48 UTC

↳ Parent: ReadGoids

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	13
c	1
b	0
f	0
dl	0
loc	26
rs	4.2

How to fix Complexity

"""Functions to read text or tsv files containing GO IDs and sections of GO IDs."""

from __future__ import print_function

import os
import sys
import re
from goatools.gosubdag.go_tasks import chk_goids
from goatools.grouper.hdrgos import HdrgosSections
from goatools.grouper.grprobj import Grouper
from goatools.grouper.tasks import SummarySec2dHdrGos

__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, All rights reserved."
__author__ = "DV Klopfenstein"


def read_sections(sections_file, exclude_ungrouped=False, prt=sys.stdout):
    """Get sections and GO grouping hdrgos from file, if sections exist."""
    if sections_file is None:
        return None
    if os.path.exists(sections_file):
        file_contents = read_goids(sections_file, False, exclude_ungrouped)
        return file_contents.get('sections', None)
    if prt:
        prt.write("CANNOT READ: {SEC}\n".format(SEC=sections_file))

def read_goids(fin_txt, get_goids_only=False, exclude_ungrouped=False, prt=sys.stdout):
    """Get user list of GO IDs either from a list or from GO IDs on the command-line"""
    return ReadGoids().read_txt(fin_txt, get_goids_only, exclude_ungrouped, prt)


class ReadGoids(object):
    """Get user list of GO IDs either from a list or from GO IDs on the command-line"""

    srch_section = re.compile(r'^#?\s*SECTION:\s*(\S.*\S)\s*$', flags=re.IGNORECASE)

    def __init__(self):
        self.goids_fin = []
        self.sections_seen = []
        self.section2goids = {}

    def read_txt(self, fin_txt, get_goids_only, exclude_ungrouped, prt=sys.stdout):
        """Get user list of GO IDs either from a list or from GO IDs on the command-line"""
        goids_fin = self._read_txt(fin_txt, get_goids_only, exclude_ungrouped)
        # Report unused sections, if any
        if len(self.section2goids) != len(self.sections_seen):
            self._rpt_unused_sections(prt)
        # If there are no sections, then goids_fin holds all GO IDs in file
        if not self.sections_seen:
            self.goids_fin = goids_fin
        # Print summary of GO IDs read
        if prt is not None:
            self._prt_read_msg(prt, fin_txt, exclude_ungrouped)

        if goids_fin:
            return self.internal_get_goids_or_sections()
        else:
            sys.stdout.write(
                "\n**WARNING: GO IDs MUST BE THE FIRST 10 CHARACTERS OF EACH LINE\n\n")

    def _read_txt(self, fin_txt, get_goids_only, exclude_ungrouped):
        """Read GO file. Store results in: section2goids sections_seen. Return goids_fin."""
        goids_sec = []
        with open(fin_txt) as istrm:
            # Lines starting with a GO ID will have that GO ID read and stored.
            #   * Lines that do not start with a GO ID will be ignored.
            #   * Text after the 10 characters in a GO ID will be ignored.
            section_name = None
            for line in istrm:
                if line[:3] == "GO:":
                    goids_sec.append(line[:10])
                elif not get_goids_only and ":" in line:
                    mtch = self.srch_section.match(line)
                    if mtch:
                        secstr = mtch.group(1)
                        if section_name is not None and goids_sec:
                            self.section2goids[section_name] = goids_sec
                        if not exclude_ungrouped or secstr != HdrgosSections.secdflt:
                            section_name = secstr
                            self.sections_seen.append(section_name)
                        else:
                            section_name = None
                        goids_sec = []
            if section_name is not None and goids_sec:
                self.section2goids[section_name] = goids_sec
        return goids_sec

    def _rpt_unused_sections(self, prt):
        """Report unused sections."""
        sections_unused = set(self.sections_seen).difference(self.section2goids.keys())
        for sec in sections_unused:
            prt.write("  UNUSED SECTION: {SEC}\n".format(SEC=sec))

    def internal_get_goids_or_sections(self):
        """Return GO IDs, Sections/GOs, or None."""
        if self.goids_fin:
            chk_goids(self.goids_fin, "read_goids")
            return {'goids' : self.goids_fin}
        else:
            # Convert dict into 2D list retaining original section order
            sections_2d = []
            for section_name in self.sections_seen:
                if section_name in self.section2goids:
                    goids = self.section2goids.get(section_name)
                    chk_goids(goids, "GO IDs IN SECTION({S})".format(S=section_name))
                    sections_2d.append((section_name, goids))
            return {'sections' : sections_2d}

    def _prt_read_msg(self, prt, fin_txt, exclude_ungrouped):
        """Print which file was read and the number of GO IDs found."""
        if self.sections_seen or exclude_ungrouped:
            # dat = Grouper.get_summary_data(self.section2goids.items(), HdrgosSections.secdflt)
            dat = SummarySec2dHdrGos().summarize_sec2hdrgos(self.section2goids.items())
            sys.stdout.write(Grouper.fmtsum.format(
                GO_DESC='hdr', SECs=len(dat['S']), GOs=len(dat['G']),
                UNGRP="N/A", undesc="unused", ACTION="READ: ", FILE=fin_txt))
        elif self.goids_fin:
            prt.write("  {G} GO IDs READ: {FIN}\n".format(G=len(self.goids_fin), FIN=fin_txt))


# Copyright (C) 2016-2018, DV Klopfenstein, All rights reserved.


1			"""Functions to read text or tsv files containing GO IDs and sections of GO IDs."""
2
3			from __future__ import print_function
4
5			import os
6			import sys
7			import re
8			from goatools.gosubdag.go_tasks import chk_goids
9			from goatools.grouper.hdrgos import HdrgosSections
10			from goatools.grouper.grprobj import Grouper
11			from goatools.grouper.tasks import SummarySec2dHdrGos
12
13			__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, All rights reserved."
14			__author__ = "DV Klopfenstein"
15
16
17			def read_sections(sections_file, exclude_ungrouped=False, prt=sys.stdout):
18			"""Get sections and GO grouping hdrgos from file, if sections exist."""
19			if sections_file is None:
20			return None
21			if os.path.exists(sections_file):
22			file_contents = read_goids(sections_file, False, exclude_ungrouped)
23			return file_contents.get('sections', None)
24			if prt:
25			prt.write("CANNOT READ: {SEC}\n".format(SEC=sections_file))
26
27			def read_goids(fin_txt, get_goids_only=False, exclude_ungrouped=False, prt=sys.stdout):
28			"""Get user list of GO IDs either from a list or from GO IDs on the command-line"""
29			return ReadGoids().read_txt(fin_txt, get_goids_only, exclude_ungrouped, prt)
30
31
32			class ReadGoids(object):
33			"""Get user list of GO IDs either from a list or from GO IDs on the command-line"""
34
35			srch_section = re.compile(r'^#?\sSECTION:\s(\S.\S)\s$', flags=re.IGNORECASE)
36
37			def __init__(self):
38			self.goids_fin = []
39			self.sections_seen = []
40			self.section2goids = {}
41
42			def read_txt(self, fin_txt, get_goids_only, exclude_ungrouped, prt=sys.stdout):
43			"""Get user list of GO IDs either from a list or from GO IDs on the command-line"""
44			goids_fin = self._read_txt(fin_txt, get_goids_only, exclude_ungrouped)
45			# Report unused sections, if any
46			if len(self.section2goids) != len(self.sections_seen):
47			self._rpt_unused_sections(prt)
48			# If there are no sections, then goids_fin holds all GO IDs in file
49			if not self.sections_seen:
50			self.goids_fin = goids_fin
51			# Print summary of GO IDs read
52			if prt is not None:
53			self._prt_read_msg(prt, fin_txt, exclude_ungrouped)
54
55			if goids_fin:
56			return self.internal_get_goids_or_sections()
57			else:
58			sys.stdout.write(
59			"\n**WARNING: GO IDs MUST BE THE FIRST 10 CHARACTERS OF EACH LINE\n\n")
60
61			def _read_txt(self, fin_txt, get_goids_only, exclude_ungrouped):
62			"""Read GO file. Store results in: section2goids sections_seen. Return goids_fin."""
63			goids_sec = []
64			with open(fin_txt) as istrm:
65			# Lines starting with a GO ID will have that GO ID read and stored.
66			# * Lines that do not start with a GO ID will be ignored.
67			# * Text after the 10 characters in a GO ID will be ignored.
68			section_name = None
69			for line in istrm:
70			if line[:3] == "GO:":
71			goids_sec.append(line[:10])
72			elif not get_goids_only and ":" in line:
73			mtch = self.srch_section.match(line)
74			if mtch:
75			secstr = mtch.group(1)
76			if section_name is not None and goids_sec:
77			self.section2goids[section_name] = goids_sec
78			if not exclude_ungrouped or secstr != HdrgosSections.secdflt:
79			section_name = secstr
80			self.sections_seen.append(section_name)
81			else:
82			section_name = None
83			goids_sec = []
84			if section_name is not None and goids_sec:
85			self.section2goids[section_name] = goids_sec
86			return goids_sec
87
88			def _rpt_unused_sections(self, prt):
89			"""Report unused sections."""
90			sections_unused = set(self.sections_seen).difference(self.section2goids.keys())
91			for sec in sections_unused:
92			prt.write(" UNUSED SECTION: {SEC}\n".format(SEC=sec))
93
94			def internal_get_goids_or_sections(self):
95			"""Return GO IDs, Sections/GOs, or None."""
96			if self.goids_fin:
97			chk_goids(self.goids_fin, "read_goids")
98			return {'goids' : self.goids_fin}
99			else:
100			# Convert dict into 2D list retaining original section order
101			sections_2d = []
102			for section_name in self.sections_seen:
103			if section_name in self.section2goids:
104			goids = self.section2goids.get(section_name)
105			chk_goids(goids, "GO IDs IN SECTION({S})".format(S=section_name))
106			sections_2d.append((section_name, goids))
107			return {'sections' : sections_2d}
108
109			def _prt_read_msg(self, prt, fin_txt, exclude_ungrouped):
110			"""Print which file was read and the number of GO IDs found."""
111			if self.sections_seen or exclude_ungrouped:
112			# dat = Grouper.get_summary_data(self.section2goids.items(), HdrgosSections.secdflt)
113			dat = SummarySec2dHdrGos().summarize_sec2hdrgos(self.section2goids.items())
114			sys.stdout.write(Grouper.fmtsum.format(
115			GO_DESC='hdr', SECs=len(dat['S']), GOs=len(dat['G']),
116			UNGRP="N/A", undesc="unused", ACTION="READ: ", FILE=fin_txt))
117			elif self.goids_fin:
118			prt.write(" {G} GO IDs READ: {FIN}\n".format(G=len(self.goids_fin), FIN=fin_txt))
119
120
121			# Copyright (C) 2016-2018, DV Klopfenstein, All rights reserved.
122

tanghaibao / goatools

ReadGoids._read_txt() D last analyzed 2018-07-02 19:48 UTC

Complexity

Size

Duplication

Importance

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like

ReadGoids._read_txt() D
last analyzed 2018-07-02 19:48 UTC