|
1
|
|
|
from os.path import basename |
|
2
|
|
|
|
|
3
|
|
|
class Diff(object): |
|
4
|
|
|
"""Difference between two files. |
|
5
|
|
|
|
|
6
|
|
|
This represents differences in provenance between two files. |
|
7
|
|
|
|
|
8
|
|
|
See :py:mod:`niprov.comparing` |
|
9
|
|
|
|
|
10
|
|
|
Args: |
|
11
|
|
|
file1 (:class:`.BaseFile`): One of two niprov BaseFile objects to |
|
12
|
|
|
compare. |
|
13
|
|
|
file2 (:class:`.BaseFile`): As file1 |
|
14
|
|
|
""" |
|
15
|
|
|
|
|
16
|
|
|
NCHARSCOL = 20 # width of columns |
|
17
|
|
|
defaultIgnore = ['_id'] # these fields are always ignored |
|
18
|
|
|
|
|
19
|
|
|
def __init__(self, file1, file2): |
|
20
|
|
|
self.file1 = file1 |
|
21
|
|
|
self.file2 = file2 |
|
22
|
|
|
|
|
23
|
|
|
def getDifferences(self, ignore=None, select=None): |
|
24
|
|
|
"""Get dictionary with fields that differ and how they differ. |
|
25
|
|
|
|
|
26
|
|
|
Args: |
|
27
|
|
|
ignore (list): Optional. List of fields not to evaluate when |
|
28
|
|
|
determining differences. |
|
29
|
|
|
select (list): Optional. List of fields that should be specifically |
|
30
|
|
|
evaluated. All other fields will be ignored. |
|
31
|
|
|
|
|
32
|
|
|
Returns: |
|
33
|
|
|
dict: A dictionary with provenance fields as keys and strings |
|
34
|
|
|
indicating how they differ. |
|
35
|
|
|
""" |
|
36
|
|
|
assert isinstance(ignore, list) or ignore is None |
|
37
|
|
|
if ignore is None: |
|
38
|
|
|
ignore = [] |
|
39
|
|
|
ignore += self.defaultIgnore |
|
40
|
|
|
prov1 = self.file1.getProvenance() |
|
41
|
|
|
prov2 = self.file2.getProvenance() |
|
42
|
|
|
if select: |
|
43
|
|
|
allkeys = set(prov1.keys()+prov2.keys()) |
|
44
|
|
|
ignore = [k for k in allkeys if k not in select] |
|
45
|
|
|
diffDict = {} |
|
46
|
|
|
for k in set(prov1.keys()).difference(prov2.keys()): |
|
47
|
|
|
if k not in ignore: |
|
48
|
|
|
diffDict[k] = 'missingIn2' |
|
49
|
|
|
for k in set(prov2.keys()).difference(prov1.keys()): |
|
50
|
|
|
if k not in ignore: |
|
51
|
|
|
diffDict[k] = 'missingIn1' |
|
52
|
|
|
for k in set(prov1.keys()).intersection(prov2.keys()): |
|
53
|
|
|
if k not in ignore: |
|
54
|
|
|
if prov1[k] != prov2[k]: |
|
55
|
|
|
diffDict[k] = 'value' |
|
56
|
|
|
return diffDict |
|
57
|
|
|
|
|
58
|
|
|
def getSame(self): |
|
59
|
|
|
"""Get dictionary with fields that have equal values. |
|
60
|
|
|
|
|
61
|
|
|
Returns: |
|
62
|
|
|
dict: A dictionary with provenance fields as keys the string |
|
63
|
|
|
'same' as value. |
|
64
|
|
|
""" |
|
65
|
|
|
prov1 = self.file1.getProvenance() |
|
66
|
|
|
prov2 = self.file2.getProvenance() |
|
67
|
|
|
sameDict = {} |
|
68
|
|
|
for k in set(prov1.keys()).intersection(prov2.keys()): |
|
69
|
|
|
if prov1[k] == prov2[k]: |
|
70
|
|
|
sameDict[k] = 'same' |
|
71
|
|
|
return sameDict |
|
72
|
|
|
|
|
73
|
|
|
def getDifferenceString(self, ignore=None, select=None): |
|
74
|
|
|
"""Get table of differences as string. |
|
75
|
|
|
|
|
76
|
|
|
Args: |
|
77
|
|
|
ignore (list): Optional. List of fields not to evaluate when |
|
78
|
|
|
determining differences. |
|
79
|
|
|
select (list): Optional. List of fields that should be specifically |
|
80
|
|
|
evaluated. All other fields will be ignored. |
|
81
|
|
|
|
|
82
|
|
|
Returns: |
|
83
|
|
|
str: A three-columns table listing provenance fields and their |
|
84
|
|
|
respective values for the two files. |
|
85
|
|
|
""" |
|
86
|
|
|
differences = self.getDifferences(ignore, select) |
|
87
|
|
|
return self._tableStringFromDiffDict(differences) |
|
88
|
|
|
|
|
89
|
|
|
def getSameString(self): |
|
90
|
|
|
"""Get table of values that are the same for the compared files. |
|
91
|
|
|
|
|
92
|
|
|
Returns: |
|
93
|
|
|
str: A three-columns table listing provenance fields and their |
|
94
|
|
|
respective values for the two files. |
|
95
|
|
|
""" |
|
96
|
|
|
same = self.getSame() |
|
97
|
|
|
return self._tableStringFromDiffDict(same) |
|
98
|
|
|
|
|
99
|
|
|
def _tableStringFromDiffDict(self, diffDict): |
|
100
|
|
|
if not diffDict: |
|
101
|
|
|
return '' |
|
102
|
|
|
name1 = basename(str(self.file1.location)) |
|
103
|
|
|
name2 = basename(str(self.file2.location)) |
|
104
|
|
|
prov1 = self.file1.getProvenance() |
|
105
|
|
|
prov2 = self.file2.getProvenance() |
|
106
|
|
|
def row(*vals): |
|
107
|
|
|
cells = [c[:self.NCHARSCOL] for c in vals] |
|
108
|
|
|
cells = [c.ljust(self.NCHARSCOL) for c in cells] |
|
109
|
|
|
return ' '.join(cells)+'\n' |
|
110
|
|
|
diffStr = 'Differences:\n' |
|
111
|
|
|
diffStr += row('', name1, name2) |
|
112
|
|
|
for field, status in diffDict.items(): |
|
113
|
|
|
val1 = prov1.get(field, 'n/a') |
|
114
|
|
|
val2 = prov2.get(field, 'n/a') |
|
115
|
|
|
diffStr += row(field, str(val1), str(val2)) |
|
116
|
|
|
return diffStr |
|
117
|
|
|
|
|
118
|
|
|
def areEqual(self, ignore=None, select=None): |
|
119
|
|
|
"""Whether there are any differences between the files. |
|
120
|
|
|
|
|
121
|
|
|
Args: |
|
122
|
|
|
ignore (list): Optional. List of fields not to evaluate when |
|
123
|
|
|
determining differences. |
|
124
|
|
|
select (list): Optional. List of fields that should be specifically |
|
125
|
|
|
evaluated. All other fields will be ignored. |
|
126
|
|
|
|
|
127
|
|
|
Returns: |
|
128
|
|
|
bool: True if no differences, False otherwise. |
|
129
|
|
|
""" |
|
130
|
|
|
differences = self.getDifferences(ignore, select) |
|
131
|
|
|
return len(differences) == 0 |
|
132
|
|
|
|
|
133
|
|
|
def areEqualProtocol(self): |
|
134
|
|
|
"""Whether there are any differences for protocol fields. |
|
135
|
|
|
|
|
136
|
|
|
Each :class:`.BaseFile` subtype has a getProtocolFields() method |
|
137
|
|
|
that is used here to selectively see if any of these are different. |
|
138
|
|
|
|
|
139
|
|
|
Returns: |
|
140
|
|
|
bool: True if no differences, False otherwise. |
|
141
|
|
|
""" |
|
142
|
|
|
protocol = self.file1.getProtocolFields() |
|
143
|
|
|
differences = self.getDifferences(select=protocol) |
|
144
|
|
|
return len(differences) == 0 |
|
145
|
|
|
|
|
146
|
|
|
def assertEqual(self, ignore=None, select=None): |
|
147
|
|
|
"""Raises exception if there are differences. |
|
148
|
|
|
|
|
149
|
|
|
Args: |
|
150
|
|
|
ignore (list): Optional. List of fields not to evaluate when |
|
151
|
|
|
determining differences. |
|
152
|
|
|
select (list): Optional. List of fields that should be specifically |
|
153
|
|
|
evaluated. All other fields will be ignored. |
|
154
|
|
|
|
|
155
|
|
|
Raises: |
|
156
|
|
|
AssertionError: Message with differences in a table. |
|
157
|
|
|
""" |
|
158
|
|
|
differences = self.getDifferenceString(ignore, select) |
|
159
|
|
|
if differences: |
|
160
|
|
|
raise AssertionError(differences) |
|
161
|
|
|
|
|
162
|
|
|
def assertEqualProtocol(self): |
|
163
|
|
|
"""Raises exception if there are differences in protocol fields. |
|
164
|
|
|
|
|
165
|
|
|
Each :class:`.BaseFile` subtype has a getProtocolFields() method |
|
166
|
|
|
that is used here to selectively see if any of these are different. |
|
167
|
|
|
|
|
168
|
|
|
Raises: |
|
169
|
|
|
AssertionError: Message with protocol differences in a table. |
|
170
|
|
|
""" |
|
171
|
|
|
protocol = self.file1.getProtocolFields() |
|
172
|
|
|
differences = self.getDifferenceString(select=protocol) |
|
173
|
|
|
if differences: |
|
174
|
|
|
raise AssertionError(differences) |
|
175
|
|
|
|
|
176
|
|
|
def __str__(self): |
|
177
|
|
|
return self.getDifferenceString() |
|
178
|
|
|
|