1
|
|
|
from os.path import basename |
2
|
|
|
|
3
|
|
|
class Diff(object): |
4
|
|
|
"""Difference between two files. |
5
|
|
|
|
6
|
|
|
This represents differences in provenance between two files. |
7
|
|
|
|
8
|
|
|
See :py:mod:`niprov.comparing` |
9
|
|
|
|
10
|
|
|
Args: |
11
|
|
|
file1 (:class:`.BaseFile`): One of two niprov BaseFile objects to |
12
|
|
|
compare. |
13
|
|
|
file2 (:class:`.BaseFile`): As file1 |
14
|
|
|
""" |
15
|
|
|
|
16
|
|
|
NCHARSCOL = 20 # width of columns |
17
|
|
|
defaultIgnore = ['_id'] # these fields are always ignored |
18
|
|
|
|
19
|
|
|
def __init__(self, file1, file2): |
20
|
|
|
self.file1 = file1 |
21
|
|
|
self.file2 = file2 |
22
|
|
|
|
23
|
|
|
def getDifferences(self, ignore=None, select=None): |
24
|
|
|
"""Get dictionary with fields that differ and how they differ. |
25
|
|
|
|
26
|
|
|
Args: |
27
|
|
|
ignore (list): Optional. List of fields not to evaluate when |
28
|
|
|
determining differences. |
29
|
|
|
select (list): Optional. List of fields that should be specifically |
30
|
|
|
evaluated. All other fields will be ignored. |
31
|
|
|
|
32
|
|
|
Returns: |
33
|
|
|
dict: A dictionary with provenance fields as keys and strings |
34
|
|
|
indicating how they differ. |
35
|
|
|
""" |
36
|
|
|
assert isinstance(ignore, list) or ignore is None |
37
|
|
|
if ignore is None: |
38
|
|
|
ignore = [] |
39
|
|
|
ignore += self.defaultIgnore |
40
|
|
|
prov1 = self.file1.getProvenance() |
41
|
|
|
prov2 = self.file2.getProvenance() |
42
|
|
|
if select: |
43
|
|
|
allkeys = set(prov1.keys()+prov2.keys()) |
44
|
|
|
ignore = [k for k in allkeys if k not in select] |
45
|
|
|
diffDict = {} |
46
|
|
|
for k in set(prov1.keys()).difference(prov2.keys()): |
47
|
|
|
if k not in ignore: |
48
|
|
|
diffDict[k] = 'missingIn2' |
49
|
|
|
for k in set(prov2.keys()).difference(prov1.keys()): |
50
|
|
|
if k not in ignore: |
51
|
|
|
diffDict[k] = 'missingIn1' |
52
|
|
|
for k in set(prov1.keys()).intersection(prov2.keys()): |
53
|
|
|
if k not in ignore: |
54
|
|
|
if prov1[k] != prov2[k]: |
55
|
|
|
diffDict[k] = 'value' |
56
|
|
|
return diffDict |
57
|
|
|
|
58
|
|
|
def getSame(self): |
59
|
|
|
"""Get dictionary with fields that have equal values. |
60
|
|
|
|
61
|
|
|
Returns: |
62
|
|
|
dict: A dictionary with provenance fields as keys the string |
63
|
|
|
'same' as value. |
64
|
|
|
""" |
65
|
|
|
prov1 = self.file1.getProvenance() |
66
|
|
|
prov2 = self.file2.getProvenance() |
67
|
|
|
sameDict = {} |
68
|
|
|
for k in set(prov1.keys()).intersection(prov2.keys()): |
69
|
|
|
if prov1[k] == prov2[k]: |
70
|
|
|
sameDict[k] = 'same' |
71
|
|
|
return sameDict |
72
|
|
|
|
73
|
|
|
def getDifferenceString(self, ignore=None, select=None): |
74
|
|
|
"""Get table of differences as string. |
75
|
|
|
|
76
|
|
|
Args: |
77
|
|
|
ignore (list): Optional. List of fields not to evaluate when |
78
|
|
|
determining differences. |
79
|
|
|
select (list): Optional. List of fields that should be specifically |
80
|
|
|
evaluated. All other fields will be ignored. |
81
|
|
|
|
82
|
|
|
Returns: |
83
|
|
|
str: A three-columns table listing provenance fields and their |
84
|
|
|
respective values for the two files. |
85
|
|
|
""" |
86
|
|
|
differences = self.getDifferences(ignore, select) |
87
|
|
|
return self._tableStringFromDiffDict(differences) |
88
|
|
|
|
89
|
|
|
def getSameString(self): |
90
|
|
|
"""Get table of values that are the same for the compared files. |
91
|
|
|
|
92
|
|
|
Returns: |
93
|
|
|
str: A three-columns table listing provenance fields and their |
94
|
|
|
respective values for the two files. |
95
|
|
|
""" |
96
|
|
|
same = self.getSame() |
97
|
|
|
return self._tableStringFromDiffDict(same) |
98
|
|
|
|
99
|
|
|
def _tableStringFromDiffDict(self, diffDict): |
100
|
|
|
if not diffDict: |
101
|
|
|
return '' |
102
|
|
|
name1 = basename(str(self.file1.location)) |
103
|
|
|
name2 = basename(str(self.file2.location)) |
104
|
|
|
prov1 = self.file1.getProvenance() |
105
|
|
|
prov2 = self.file2.getProvenance() |
106
|
|
|
def row(*vals): |
107
|
|
|
cells = [c[:self.NCHARSCOL] for c in vals] |
108
|
|
|
cells = [c.ljust(self.NCHARSCOL) for c in cells] |
109
|
|
|
return ' '.join(cells)+'\n' |
110
|
|
|
diffStr = 'Differences:\n' |
111
|
|
|
diffStr += row('', name1, name2) |
112
|
|
|
for field, status in diffDict.items(): |
113
|
|
|
val1 = prov1.get(field, 'n/a') |
114
|
|
|
val2 = prov2.get(field, 'n/a') |
115
|
|
|
diffStr += row(field, str(val1), str(val2)) |
116
|
|
|
return diffStr |
117
|
|
|
|
118
|
|
|
def areEqual(self, ignore=None, select=None): |
119
|
|
|
"""Whether there are any differences between the files. |
120
|
|
|
|
121
|
|
|
Args: |
122
|
|
|
ignore (list): Optional. List of fields not to evaluate when |
123
|
|
|
determining differences. |
124
|
|
|
select (list): Optional. List of fields that should be specifically |
125
|
|
|
evaluated. All other fields will be ignored. |
126
|
|
|
|
127
|
|
|
Returns: |
128
|
|
|
bool: True if no differences, False otherwise. |
129
|
|
|
""" |
130
|
|
|
differences = self.getDifferences(ignore, select) |
131
|
|
|
return len(differences) == 0 |
132
|
|
|
|
133
|
|
|
def areEqualProtocol(self): |
134
|
|
|
"""Whether there are any differences for protocol fields. |
135
|
|
|
|
136
|
|
|
Each :class:`.BaseFile` subtype has a getProtocolFields() method |
137
|
|
|
that is used here to selectively see if any of these are different. |
138
|
|
|
|
139
|
|
|
Returns: |
140
|
|
|
bool: True if no differences, False otherwise. |
141
|
|
|
""" |
142
|
|
|
protocol = self.file1.getProtocolFields() |
143
|
|
|
differences = self.getDifferences(select=protocol) |
144
|
|
|
return len(differences) == 0 |
145
|
|
|
|
146
|
|
|
def assertEqual(self, ignore=None, select=None): |
147
|
|
|
"""Raises exception if there are differences. |
148
|
|
|
|
149
|
|
|
Args: |
150
|
|
|
ignore (list): Optional. List of fields not to evaluate when |
151
|
|
|
determining differences. |
152
|
|
|
select (list): Optional. List of fields that should be specifically |
153
|
|
|
evaluated. All other fields will be ignored. |
154
|
|
|
|
155
|
|
|
Raises: |
156
|
|
|
AssertionError: Message with differences in a table. |
157
|
|
|
""" |
158
|
|
|
differences = self.getDifferenceString(ignore, select) |
159
|
|
|
if differences: |
160
|
|
|
raise AssertionError(differences) |
161
|
|
|
|
162
|
|
|
def assertEqualProtocol(self): |
163
|
|
|
"""Raises exception if there are differences in protocol fields. |
164
|
|
|
|
165
|
|
|
Each :class:`.BaseFile` subtype has a getProtocolFields() method |
166
|
|
|
that is used here to selectively see if any of these are different. |
167
|
|
|
|
168
|
|
|
Raises: |
169
|
|
|
AssertionError: Message with protocol differences in a table. |
170
|
|
|
""" |
171
|
|
|
protocol = self.file1.getProtocolFields() |
172
|
|
|
differences = self.getDifferenceString(select=protocol) |
173
|
|
|
if differences: |
174
|
|
|
raise AssertionError(differences) |
175
|
|
|
|
176
|
|
|
def __str__(self): |
177
|
|
|
return self.getDifferenceString() |
178
|
|
|
|