Diff.getDifferences()   F
last analyzed

Complexity

Conditions 13

Size

Total Lines 34

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 13
dl 0
loc 34
rs 2.7716
c 0
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like Diff.getDifferences() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from os.path import basename
2
3
class Diff(object):
4
    """Difference between two files.
5
6
    This represents differences in provenance between two files.
7
8
    See :py:mod:`niprov.comparing`
9
10
    Args:
11
        file1 (:class:`.BaseFile`): One of two niprov BaseFile objects to 
12
            compare.
13
        file2 (:class:`.BaseFile`): As file1
14
    """
15
16
    NCHARSCOL = 20              # width of columns
17
    defaultIgnore = ['_id']     # these fields are always ignored
18
19
    def __init__(self, file1, file2):
20
        self.file1 = file1
21
        self.file2 = file2
22
23
    def getDifferences(self, ignore=None, select=None):
24
        """Get dictionary with fields that differ and how they differ.
25
26
        Args:
27
            ignore (list): Optional. List of fields not to evaluate when 
28
                determining differences.
29
            select (list): Optional. List of fields that should be specifically
30
                evaluated. All other fields will be ignored.
31
32
        Returns:
33
            dict: A dictionary with provenance fields as keys and strings
34
                indicating how they differ.
35
        """
36
        assert isinstance(ignore, list) or ignore is None
37
        if ignore is None:
38
            ignore = []
39
        ignore += self.defaultIgnore
40
        prov1 = self.file1.getProvenance()
41
        prov2 = self.file2.getProvenance()
42
        if select:
43
            allkeys = set(prov1.keys()+prov2.keys())
44
            ignore = [k for k in allkeys if k not in select]
45
        diffDict = {}
46
        for k in set(prov1.keys()).difference(prov2.keys()):
47
            if k not in ignore:
48
                diffDict[k] = 'missingIn2'
49
        for k in set(prov2.keys()).difference(prov1.keys()):
50
            if k not in ignore:
51
                diffDict[k] = 'missingIn1'
52
        for k in set(prov1.keys()).intersection(prov2.keys()):
53
            if k not in ignore:
54
                if prov1[k] != prov2[k]:
55
                    diffDict[k] = 'value'
56
        return diffDict
57
58
    def getSame(self):
59
        """Get dictionary with fields that have equal values.
60
61
        Returns:
62
            dict: A dictionary with provenance fields as keys the string
63
                  'same' as value.
64
        """
65
        prov1 = self.file1.getProvenance()
66
        prov2 = self.file2.getProvenance()
67
        sameDict = {}
68
        for k in set(prov1.keys()).intersection(prov2.keys()):
69
            if prov1[k] == prov2[k]:
70
                sameDict[k] = 'same'
71
        return sameDict
72
73
    def getDifferenceString(self, ignore=None, select=None):
74
        """Get table of differences as string.
75
76
        Args:
77
            ignore (list): Optional. List of fields not to evaluate when 
78
                determining differences.
79
            select (list): Optional. List of fields that should be specifically
80
                evaluated. All other fields will be ignored.
81
82
        Returns:
83
            str: A three-columns table listing provenance fields and their
84
                respective values for the two files.
85
        """
86
        differences = self.getDifferences(ignore, select)
87
        return self._tableStringFromDiffDict(differences)
88
89
    def getSameString(self):
90
        """Get table of values that are the same for the compared files.
91
92
        Returns:
93
            str: A three-columns table listing provenance fields and their
94
                respective values for the two files.
95
        """
96
        same = self.getSame()
97
        return self._tableStringFromDiffDict(same)
98
99
    def _tableStringFromDiffDict(self, diffDict):
100
        if not diffDict:
101
            return ''
102
        name1 = basename(str(self.file1.location))
103
        name2 = basename(str(self.file2.location))
104
        prov1 = self.file1.getProvenance()
105
        prov2 = self.file2.getProvenance()
106
        def row(*vals):
107
            cells = [c[:self.NCHARSCOL] for c in vals]
108
            cells = [c.ljust(self.NCHARSCOL) for c in cells]
109
            return ' '.join(cells)+'\n'
110
        diffStr = 'Differences:\n'
111
        diffStr += row('', name1, name2)
112
        for field, status in diffDict.items():
113
            val1 = prov1.get(field, 'n/a')
114
            val2 = prov2.get(field, 'n/a')
115
            diffStr += row(field, str(val1), str(val2))
116
        return diffStr
117
118
    def areEqual(self, ignore=None, select=None):
119
        """Whether there are any differences between the files.
120
121
        Args:
122
            ignore (list): Optional. List of fields not to evaluate when 
123
                determining differences.
124
            select (list): Optional. List of fields that should be specifically
125
                evaluated. All other fields will be ignored.
126
127
        Returns:
128
            bool: True if no differences, False otherwise.
129
        """
130
        differences = self.getDifferences(ignore, select)
131
        return len(differences) == 0
132
133
    def areEqualProtocol(self):
134
        """Whether there are any differences for protocol fields.
135
136
        Each :class:`.BaseFile` subtype has a getProtocolFields() method
137
        that is used here to selectively see if any of these are different.
138
139
        Returns:
140
            bool: True if no differences, False otherwise.
141
        """
142
        protocol = self.file1.getProtocolFields()
143
        differences = self.getDifferences(select=protocol)
144
        return len(differences) == 0
145
146
    def assertEqual(self, ignore=None, select=None):
147
        """Raises exception if there are differences.
148
149
        Args:
150
            ignore (list): Optional. List of fields not to evaluate when 
151
                determining differences.
152
            select (list): Optional. List of fields that should be specifically
153
                evaluated. All other fields will be ignored.
154
155
        Raises:
156
            AssertionError: Message with differences in a table.
157
        """
158
        differences = self.getDifferenceString(ignore, select)
159
        if differences:
160
            raise AssertionError(differences)
161
162
    def assertEqualProtocol(self):
163
        """Raises exception if there are differences in protocol fields.
164
165
        Each :class:`.BaseFile` subtype has a getProtocolFields() method
166
        that is used here to selectively see if any of these are different.
167
168
        Raises:
169
            AssertionError: Message with protocol differences in a table.
170
        """
171
        protocol = self.file1.getProtocolFields()
172
        differences = self.getDifferenceString(select=protocol)
173
        if differences:
174
            raise AssertionError(differences)
175
176
    def __str__(self):
177
        return self.getDifferenceString()
178