Completed
Pull Request — master (#132)
by Jasper
01:06
created

Diff.getSame()   A

Complexity

Conditions 3

Size

Total Lines 8

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 3
dl 0
loc 8
rs 9.4285
1
from os.path import basename
2
3
class Diff(object):
4
    """Difference between two files.
5
6
    This represents differences in provenance between two files.
7
8
    See :py:mod:`niprov.comparing`
9
10
    Args:
11
        file1 (:class:`.BaseFile`): One of two niprov BaseFile objects to 
12
            compare.
13
        file2 (:class:`.BaseFile`): As file1
14
    """
15
16
    NCHARSCOL = 20              # width of columns
17
    defaultIgnore = ['_id']     # these fields are always ignored
18
19
    def __init__(self, file1, file2):
20
        self.file1 = file1
21
        self.file2 = file2
22
23
    def getDifferences(self, ignore=None, select=None):
24
        """Get dictionary with fields that differ and how they differ.
25
26
        Args:
27
            ignore (list): Optional. List of fields not to evaluate when 
28
                determining differences.
29
            select (list): Optional. List of fields that should be specifically
30
                evaluated. All other fields will be ignored.
31
32
        Returns:
33
            dict: A dictionary with provenance fields as keys and strings
34
                indicating how they differ.
35
        """
36
        assert isinstance(ignore, list) or ignore is None
37
        if ignore is None:
38
            ignore = []
39
        ignore += self.defaultIgnore
40
        prov1 = self.file1.getProvenance()
41
        prov2 = self.file2.getProvenance()
42
        if select:
43
            allkeys = set(prov1.keys()+prov2.keys())
44
            ignore = [k for k in allkeys if k not in select]
45
        diffDict = {}
46
        for k in set(prov1.keys()).difference(prov2.keys()):
47
            if k not in ignore:
48
                diffDict[k] = 'missingIn2'
49
        for k in set(prov2.keys()).difference(prov1.keys()):
50
            if k not in ignore:
51
                diffDict[k] = 'missingIn1'
52
        for k in set(prov1.keys()).intersection(prov2.keys()):
53
            if k not in ignore:
54
                if prov1[k] != prov2[k]:
55
                    diffDict[k] = 'value'
56
        return diffDict
57
58
    def getSame(self):
59
        prov1 = self.file1.getProvenance()
60
        prov2 = self.file2.getProvenance()
61
        sameDict = {}
62
        for k in set(prov1.keys()).intersection(prov2.keys()):
63
            if prov1[k] == prov2[k]:
64
                sameDict[k] = 'same'
65
        return sameDict
66
67
    def getDifferenceString(self, ignore=None, select=None):
68
        """Get table of differences as string.
69
70
        Args:
71
            ignore (list): Optional. List of fields not to evaluate when 
72
                determining differences.
73
            select (list): Optional. List of fields that should be specifically
74
                evaluated. All other fields will be ignored.
75
76
        Returns:
77
            str: A three-columns table listing provenance fields and their
78
                respective values for the two files.
79
        """
80
        differences = self.getDifferences(ignore, select)
81
        return self._tableStringFromDiffDict(differences)
82
83
    def getSameString(self):
84
        same = self.getSame()
85
        return self._tableStringFromDiffDict(same)
86
87
    def _tableStringFromDiffDict(self, diffDict):
88
        if not diffDict:
89
            return ''
90
        name1 = basename(str(self.file1.location))
91
        name2 = basename(str(self.file2.location))
92
        prov1 = self.file1.getProvenance()
93
        prov2 = self.file2.getProvenance()
94
        def row(*vals):
95
            cells = [c[:self.NCHARSCOL] for c in vals]
96
            cells = [c.ljust(self.NCHARSCOL) for c in cells]
97
            return ' '.join(cells)+'\n'
98
        diffStr = 'Differences:\n'
99
        diffStr += row('', name1, name2)
100
        for field, status in diffDict.items():
101
            val1 = prov1.get(field, 'n/a')
102
            val2 = prov2.get(field, 'n/a')
103
            diffStr += row(field, str(val1), str(val2))
104
        return diffStr
105
106
    def areEqual(self, ignore=None, select=None):
107
        """Whether there are any differences between the files.
108
109
        Args:
110
            ignore (list): Optional. List of fields not to evaluate when 
111
                determining differences.
112
            select (list): Optional. List of fields that should be specifically
113
                evaluated. All other fields will be ignored.
114
115
        Returns:
116
            bool: True if no differences, False otherwise.
117
        """
118
        differences = self.getDifferences(ignore, select)
119
        return len(differences) == 0
120
121
    def areEqualProtocol(self):
122
        """Whether there are any differences for protocol fields.
123
124
        Each :class:`.BaseFile` subtype has a getProtocolFields() method
125
        that is used here to selectively see if any of these are different.
126
127
        Returns:
128
            bool: True if no differences, False otherwise.
129
        """
130
        protocol = self.file1.getProtocolFields()
131
        differences = self.getDifferences(select=protocol)
132
        return len(differences) == 0
133
134
    def assertEqual(self, ignore=None, select=None):
135
        """Raises exception if there are differences.
136
137
        Args:
138
            ignore (list): Optional. List of fields not to evaluate when 
139
                determining differences.
140
            select (list): Optional. List of fields that should be specifically
141
                evaluated. All other fields will be ignored.
142
143
        Raises:
144
            AssertionError: Message with differences in a table.
145
        """
146
        differences = self.getDifferenceString(ignore, select)
147
        if differences:
148
            raise AssertionError(differences)
149
150
    def assertEqualProtocol(self):
151
        """Raises exception if there are differences in protocol fields.
152
153
        Each :class:`.BaseFile` subtype has a getProtocolFields() method
154
        that is used here to selectively see if any of these are different.
155
156
        Raises:
157
            AssertionError: Message with protocol differences in a table.
158
        """
159
        protocol = self.file1.getProtocolFields()
160
        differences = self.getDifferenceString(select=protocol)
161
        if differences:
162
            raise AssertionError(differences)
163
164
    def __str__(self):
165
        return self.getDifferenceString()
166