StrongIndicatorList   A
last analyzed

Complexity

Total Complexity 1

Size/Duplication

Total Lines 9
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 9
rs 10
wmc 1

1 Method

Rating   Name   Duplication   Size   Complexity  
A __init__() 0 5 1
1
"""Package containing indicators lists
2
3
.. Authors:
4
    Philippe Dessauw
5
    [email protected]
6
7
.. Sponsor:
8
    Alden Dima
9
    [email protected]
10
    Information Systems Group
11
    Software and Systems Division
12
    Information Technology Laboratory
13
    National Institute of Standards and Technology
14
    http://www.nist.gov/itl/ssd/is
15
"""
16
from __future__ import division
17
from denoiser.models.indicators import *
18
19
20
class IndicatorsList(object):
21
    """Object handling a list of indicator of a same purpose
22
    """
23
24
    def __init__(self):
25
        self.indicators = []
26
27
    def add_indicator(self, indicator):
28
        """Add an indicator to the list
29
30
        Args:
31
            indicator (Indicator): Indicator to add to the list
32
        """
33
        self.indicators.append(indicator)
34
35
    def set_stats(self, stats):
36
        """Set stats for all the StatsIndicator
37
38
        Args:
39
            stats (`Statistics`): Text statistics to setup
40
        """
41
        for indicator in self.indicators:
42
            if indicator.__class__.__base__ == StatsIndicator:
43
                indicator.set_stats(stats)
44
45
    def match(self, line):
46
        """Define if a line is matching the indicators
47
48
        Args:
49
            line (`Line`): Input line
50
51
        Returns:
52
            bool: True if line match at least one indicator
53
        """
54
        return self.match_rate(line) > 0
55
56
    def match_rate(self, line):
57
        """Get the ratio of match of a line
58
59
        Args:
60
            line (Line): Input line
61
62
        Returns:
63
            float: Ratio of match / number of indicators
64
        """
65
        total_ind = len(self.indicators)
66
        matching_ind = 0
67
68
        for indicator in self.indicators:
69
            if indicator.match(line):
70
                matching_ind += 1
71
72
        return matching_ind / total_ind
73
74
75
class StrongIndicatorList(IndicatorsList):
76
    """List of strong indicator (detecting garbage strings)
77
    """
78
79
    def __init__(self):
80
        super(StrongIndicatorList, self).__init__()
81
82
        self.add_indicator(AlphaNumIndicator())
83
        self.add_indicator(CardinalNumberIndicator())
84
85
86
class CleanIndicatorList(IndicatorsList):
87
    """List detecting clean lines
88
    """
89
90
    def __init__(self):
91
        super(CleanIndicatorList, self).__init__()
92
93
        self.add_indicator(CleanTextIndicator())
94
        self.add_indicator(TitleIndicator())
95