|
1
|
|
|
from collections import defaultdict |
|
2
|
|
|
from itertools import product |
|
3
|
|
|
from math import sqrt, floor, ceil |
|
4
|
|
|
import random |
|
5
|
|
|
import sys |
|
6
|
|
|
|
|
7
|
|
|
from PyQt4.QtCore import Qt |
|
|
|
|
|
|
8
|
|
|
from PyQt4.QtGui import (QGraphicsScene, QGraphicsView, QColor, QPen, QBrush, |
|
|
|
|
|
|
9
|
|
|
QDialog, QApplication) |
|
10
|
|
|
|
|
11
|
|
|
|
|
12
|
|
|
import Orange |
|
13
|
|
|
from Orange.data import Table |
|
14
|
|
|
from Orange.data.sql.table import SqlTable, LARGE_TABLE, DEFAULT_SAMPLE_TIME |
|
15
|
|
|
from Orange.statistics.contingency import get_contingency |
|
16
|
|
|
from Orange.widgets import gui |
|
17
|
|
|
from Orange.widgets.utils import getHtmlCompatibleString |
|
18
|
|
|
from Orange.widgets.visualize.owmosaic import (OWCanvasText, OWCanvasRectangle, |
|
19
|
|
|
OWCanvasEllipse, OWCanvasLine) |
|
20
|
|
|
from Orange.widgets.widget import OWWidget, Default, AttributeList |
|
21
|
|
|
from Orange.widgets.io import FileFormats |
|
22
|
|
|
|
|
23
|
|
|
|
|
24
|
|
|
class OWSieveDiagram(OWWidget): |
|
25
|
|
|
name = "Sieve Diagram" |
|
26
|
|
|
description = "A two-way contingency table providing information in " \ |
|
27
|
|
|
"relation to expected frequency of combination of feature " \ |
|
28
|
|
|
"values under independence." |
|
29
|
|
|
icon = "icons/SieveDiagram.svg" |
|
30
|
|
|
priority = 4200 |
|
31
|
|
|
|
|
32
|
|
|
inputs = [("Data", Table, "setData", Default), |
|
33
|
|
|
("Features", AttributeList, "setShownAttributes")] |
|
34
|
|
|
outputs = [] |
|
35
|
|
|
|
|
36
|
|
|
settingsList = ["showLines", "showCases", "showInColor"] |
|
37
|
|
|
|
|
38
|
|
|
want_graph = True |
|
39
|
|
|
|
|
40
|
|
|
def __init__(self,parent=None, signalManager = None): |
|
41
|
|
|
OWWidget.__init__(self, parent, signalManager, "Sieve diagram", True) |
|
42
|
|
|
|
|
43
|
|
|
#self.controlArea.setMinimumWidth(250) |
|
44
|
|
|
|
|
45
|
|
|
#set default settings |
|
46
|
|
|
self.data = None |
|
47
|
|
|
|
|
48
|
|
|
self.attrX = "" |
|
49
|
|
|
self.attrY = "" |
|
50
|
|
|
self.attrCondition = "" |
|
51
|
|
|
self.attrConditionValue = "" |
|
52
|
|
|
self.showLines = 1 |
|
53
|
|
|
self.showCases = 0 |
|
54
|
|
|
self.showInColor = 1 |
|
55
|
|
|
self.attributeSelectionList = None |
|
56
|
|
|
self.stopCalculating = 0 |
|
57
|
|
|
|
|
58
|
|
|
self.canvas = QGraphicsScene() |
|
59
|
|
|
self.canvasView = QGraphicsView(self.canvas, self.mainArea) |
|
60
|
|
|
self.mainArea.layout().addWidget(self.canvasView) |
|
61
|
|
|
self.canvasView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) |
|
62
|
|
|
self.canvasView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) |
|
63
|
|
|
|
|
64
|
|
|
#GUI |
|
65
|
|
|
self.attrSelGroup = gui.widgetBox(self.controlArea, box = "Shown attributes") |
|
66
|
|
|
|
|
67
|
|
|
self.attrXCombo = gui.comboBox( |
|
68
|
|
|
self.attrSelGroup, self, value="attrX", label="X attribute:", |
|
69
|
|
|
orientation="horizontal", tooltip="Select an attribute to be shown on the X axis", |
|
70
|
|
|
callback=self.updateGraph, sendSelectedValue=1, valueType=str, |
|
71
|
|
|
labelWidth=70, contentsLength=12) |
|
72
|
|
|
|
|
73
|
|
|
self.attrYCombo = gui.comboBox( |
|
74
|
|
|
self.attrSelGroup, self, value="attrY", label="Y attribute:", |
|
75
|
|
|
orientation="horizontal", tooltip="Select an attribute to be shown on the Y axis", |
|
76
|
|
|
callback=self.updateGraph, sendSelectedValue=1, valueType=str, |
|
77
|
|
|
labelWidth=70, contentsLength=12) |
|
78
|
|
|
|
|
79
|
|
|
gui.separator(self.controlArea) |
|
80
|
|
|
|
|
81
|
|
|
self.conditionGroup = gui.widgetBox(self.controlArea, box = "Condition") |
|
82
|
|
|
self.attrConditionCombo = gui.comboBox( |
|
83
|
|
|
self.conditionGroup, self, value="attrCondition", |
|
84
|
|
|
label="Attribute:", orientation="horizontal", |
|
85
|
|
|
callback=self.updateConditionAttr, sendSelectedValue=True, |
|
86
|
|
|
valueType=str, labelWidth=70, contentsLength=12) |
|
87
|
|
|
self.attrConditionValueCombo = gui.comboBox( |
|
88
|
|
|
self.conditionGroup, self, value="attrConditionValue", |
|
89
|
|
|
label="Value:", orientation="horizontal", callback=self.updateGraph, |
|
90
|
|
|
sendSelectedValue=True, valueType=str, labelWidth=70, |
|
91
|
|
|
contentsLength=10) |
|
92
|
|
|
|
|
93
|
|
|
gui.separator(self.controlArea) |
|
94
|
|
|
|
|
95
|
|
|
box2 = gui.widgetBox(self.controlArea, box = "Visual settings") |
|
96
|
|
|
gui.checkBox(box2, self, "showLines", "Show squares (observed frequency)", callback = self.updateGraph) |
|
97
|
|
|
hbox = gui.widgetBox(box2, orientation = "horizontal") |
|
98
|
|
|
gui.checkBox(hbox, self, "showCases", "Show data instances...", callback = self.updateGraph) |
|
99
|
|
|
gui.checkBox(hbox, self, "showInColor", "...in color", callback = self.updateGraph) |
|
100
|
|
|
|
|
101
|
|
|
gui.separator(self.controlArea) |
|
102
|
|
|
# self.optimizationDlg = OWSieveOptimization(self, self.signalManager) |
|
103
|
|
|
# optimizationButtons = gui.widgetBox(self.controlArea, "Dialogs", orientation = "horizontal") |
|
104
|
|
|
# gui.button(optimizationButtons, self, "VizRank", callback = self.optimizationDlg.reshow, debuggingEnabled = 0, tooltip = "Find attribute groups with highest value dependency") |
|
|
|
|
|
|
105
|
|
|
|
|
106
|
|
|
gui.rubber(self.controlArea) |
|
107
|
|
|
|
|
108
|
|
|
# self.wdChildDialogs = [self.optimizationDlg] # used when running widget debugging |
|
109
|
|
|
# self.graphButton.clicked.connect(self.saveToFileCanvas) |
|
110
|
|
|
self.icons = gui.attributeIconDict |
|
111
|
|
|
self.resize(800, 550) |
|
112
|
|
|
random.seed() |
|
113
|
|
|
self.graphButton.clicked.connect(self.save_graph) |
|
114
|
|
|
|
|
115
|
|
|
def sendReport(self): |
|
116
|
|
|
self.startReport("%s [%s, %s]" % (self.windowTitle(), self.attrX, self.attrY)) |
|
117
|
|
|
self.reportSettings("", |
|
118
|
|
|
[("X-Attribute", self.attrX), ("Y-Attribute", self.attrY), |
|
119
|
|
|
self.attrCondition != "(None)" and ("Condition", "%s = '%s'" % (self.attrCondition, self.attrConditionValue))]) |
|
|
|
|
|
|
120
|
|
|
# self.reportImage(lambda *x: OWChooseImageSizeDlg(self.canvas).saveImage(*x)) |
|
121
|
|
|
|
|
122
|
|
|
|
|
123
|
|
|
# receive new data and update all fields |
|
124
|
|
|
def setData(self, data): |
|
|
|
|
|
|
125
|
|
|
if type(data) == SqlTable and data.approx_len() > LARGE_TABLE: |
|
126
|
|
|
data = data.sample_time(DEFAULT_SAMPLE_TIME) |
|
127
|
|
|
|
|
128
|
|
|
self.information(0) |
|
129
|
|
|
self.information(1) |
|
130
|
|
|
sameDomain = self.data and data and self.data.domain.checksum() == data.domain.checksum() # preserve attribute choice if the domain is the same |
|
|
|
|
|
|
131
|
|
|
# self.data = self.optimizationDlg.setData(data, 0) |
|
132
|
|
|
self.data = data |
|
133
|
|
|
|
|
134
|
|
|
if not sameDomain: |
|
135
|
|
|
self.initCombos() |
|
136
|
|
|
|
|
137
|
|
|
self.warning(0, "") |
|
138
|
|
|
if data: |
|
139
|
|
|
if any(attr.is_continuous for attr in data.domain): |
|
140
|
|
|
self.warning(0, "Data contains continuous variables. " + |
|
141
|
|
|
"Discretize the data to use them.") |
|
142
|
|
|
|
|
143
|
|
|
self.setShownAttributes(self.attributeSelectionList) |
|
144
|
|
|
|
|
145
|
|
|
## Attribute selection signal |
|
146
|
|
|
def setShownAttributes(self, attrList): |
|
147
|
|
|
self.attributeSelectionList = attrList |
|
148
|
|
|
if self.data and self.attributeSelectionList and len(attrList) >= 2: |
|
149
|
|
|
attrs = [attr.name for attr in self.data.domain] |
|
150
|
|
|
if attrList[0] in attrs and attrList[1] in attrs: |
|
151
|
|
|
self.attrX = attrList[0] |
|
152
|
|
|
self.attrY = attrList[1] |
|
153
|
|
|
self.updateGraph() |
|
154
|
|
|
|
|
155
|
|
|
|
|
156
|
|
|
|
|
157
|
|
|
# create data subset depending on conditional attribute and value |
|
158
|
|
|
def getConditionalData(self, xAttr = None, yAttr = None, dropMissingData = 1): |
|
|
|
|
|
|
159
|
|
|
if not self.data: return None |
|
160
|
|
|
|
|
161
|
|
|
if not xAttr: xAttr = self.attrX |
|
162
|
|
|
if not yAttr: yAttr = self.attrY |
|
163
|
|
|
if not (xAttr and yAttr): return |
|
164
|
|
|
|
|
165
|
|
|
if self.attrCondition == "(None)": |
|
166
|
|
|
data = self.data[:, [xAttr, yAttr]] |
|
|
|
|
|
|
167
|
|
|
# data = self.data.select([xAttr, yAttr]) |
|
168
|
|
|
else: |
|
169
|
|
|
# data = orange.Preprocessor_dropMissing(self.data.select([xAttr, yAttr, self.attrCondition])) |
|
170
|
|
|
# data = self.data.select({self.attrCondition:self.attrConditionValue}) |
|
171
|
|
|
fd = Orange.data.filter.FilterDiscrete(column=self.attrCondition, values=[self.attrConditionValue]) |
|
172
|
|
|
filt = Orange.data.filter.Values([fd]) |
|
173
|
|
|
filt.domain = self.data.domain |
|
174
|
|
|
data = filt(self.data) |
|
175
|
|
|
|
|
176
|
|
|
# if dropMissingData: return orange.Preprocessor_dropMissing(data) |
|
177
|
|
|
#else: |
|
178
|
|
|
return data |
|
179
|
|
|
|
|
180
|
|
|
# new conditional attribute was set - update graph |
|
181
|
|
|
def updateConditionAttr(self): |
|
182
|
|
|
self.attrConditionValueCombo.clear() |
|
183
|
|
|
|
|
184
|
|
|
if self.attrCondition != "(None)": |
|
185
|
|
|
for val in self.data.domain[self.attrCondition].values: |
|
186
|
|
|
self.attrConditionValueCombo.addItem(val) |
|
187
|
|
|
self.attrConditionValue = str(self.attrConditionValueCombo.itemText(0)) |
|
188
|
|
|
self.updateGraph() |
|
189
|
|
|
|
|
190
|
|
|
# initialize lists for shown and hidden attributes |
|
191
|
|
|
def initCombos(self): |
|
192
|
|
|
self.attrXCombo.clear() |
|
193
|
|
|
self.attrYCombo.clear() |
|
194
|
|
|
self.attrConditionCombo.clear() |
|
195
|
|
|
self.attrConditionCombo.addItem("(None)") |
|
196
|
|
|
self.attrConditionValueCombo.clear() |
|
197
|
|
|
|
|
198
|
|
|
if not self.data: return |
|
199
|
|
|
for i, var in enumerate(self.data.domain): |
|
200
|
|
|
if var.is_discrete: |
|
201
|
|
|
self.attrXCombo.addItem(self.icons[self.data.domain[i]], self.data.domain[i].name) |
|
202
|
|
|
self.attrYCombo.addItem(self.icons[self.data.domain[i]], self.data.domain[i].name) |
|
203
|
|
|
self.attrConditionCombo.addItem(self.icons[self.data.domain[i]], self.data.domain[i].name) |
|
204
|
|
|
self.attrCondition = str(self.attrConditionCombo.itemText(0)) |
|
205
|
|
|
|
|
206
|
|
|
if self.attrXCombo.count() > 0: |
|
207
|
|
|
self.attrX = str(self.attrXCombo.itemText(0)) |
|
208
|
|
|
self.attrY = str(self.attrYCombo.itemText(self.attrYCombo.count() > 1)) |
|
209
|
|
|
else: |
|
210
|
|
|
self.attrX = None |
|
211
|
|
|
self.attrY = None |
|
212
|
|
|
|
|
213
|
|
|
def resizeEvent(self, e): |
|
214
|
|
|
OWWidget.resizeEvent(self,e) |
|
215
|
|
|
self.updateGraph() |
|
216
|
|
|
|
|
217
|
|
|
def showEvent(self, ev): |
|
218
|
|
|
OWWidget.showEvent(self, ev) |
|
219
|
|
|
self.updateGraph() |
|
220
|
|
|
|
|
221
|
|
|
## updateGraph - gets called every time the graph has to be updated |
|
222
|
|
|
def updateGraph(self, *args): |
|
|
|
|
|
|
223
|
|
|
for item in self.canvas.items(): |
|
224
|
|
|
self.canvas.removeItem(item) # remove all canvas items |
|
225
|
|
|
if not self.data: return |
|
226
|
|
|
if not self.attrX or not self.attrY: return |
|
227
|
|
|
|
|
228
|
|
|
data = self.getConditionalData() |
|
|
|
|
|
|
229
|
|
|
if not data or len(data) == 0: return |
|
230
|
|
|
|
|
231
|
|
|
valsX = [] |
|
232
|
|
|
valsY = [] |
|
233
|
|
|
# contX = orange.ContingencyAttrAttr(self.attrX, self.attrX, data) # distribution of X attribute |
|
234
|
|
|
# contY = orange.ContingencyAttrAttr(self.attrY, self.attrY, data) # distribution of Y attribute |
|
235
|
|
|
contX = get_contingency(data, self.attrX, self.attrX) |
|
236
|
|
|
contY = get_contingency(data, self.attrY, self.attrY) |
|
237
|
|
|
|
|
238
|
|
|
# compute contingency of x and y attributes |
|
239
|
|
|
for entry in contX: |
|
240
|
|
|
sum_ = 0 |
|
241
|
|
|
try: |
|
242
|
|
|
for val in entry: sum_ += val |
|
243
|
|
|
except: pass |
|
|
|
|
|
|
244
|
|
|
valsX.append(sum_) |
|
245
|
|
|
|
|
246
|
|
|
for entry in contY: |
|
247
|
|
|
sum_ = 0 |
|
248
|
|
|
try: |
|
249
|
|
|
for val in entry: sum_ += val |
|
250
|
|
|
except: pass |
|
|
|
|
|
|
251
|
|
|
valsY.append(sum_) |
|
252
|
|
|
|
|
253
|
|
|
# create cartesian product of selected attributes and compute contingency |
|
254
|
|
|
# (cart, profit) = FeatureByCartesianProduct(data, [data.domain[self.attrX], data.domain[self.attrY]]) |
|
255
|
|
|
# tempData = data.select(list(data.domain) + [cart]) |
|
256
|
|
|
# contXY = orange.ContingencyAttrAttr(cart, cart, tempData) # distribution of X attribute |
|
257
|
|
|
# contXY = get_contingency(tempData, cart, cart) |
|
258
|
|
|
contXY = self.getConditionalDistributions(data, [data.domain[self.attrX], data.domain[self.attrY]]) |
|
259
|
|
|
|
|
260
|
|
|
# compute probabilities |
|
261
|
|
|
probs = {} |
|
262
|
|
|
for i in range(len(valsX)): |
|
263
|
|
|
valx = valsX[i] |
|
264
|
|
|
for j in range(len(valsY)): |
|
265
|
|
|
valy = valsY[j] |
|
266
|
|
|
|
|
267
|
|
|
actualProb = 0 |
|
268
|
|
|
try: |
|
269
|
|
|
actualProb = contXY['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] |
|
270
|
|
|
# for val in contXY['%s-%s' %(i, j)]: actualProb += val |
|
271
|
|
|
except: |
|
|
|
|
|
|
272
|
|
|
actualProb = 0 |
|
273
|
|
|
probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] = ((data.domain[self.attrX].values[i], valx), (data.domain[self.attrY].values[j], valy), actualProb, len(data)) |
|
|
|
|
|
|
274
|
|
|
|
|
275
|
|
|
#get text width of Y labels |
|
276
|
|
|
max_ylabel_w = 0 |
|
277
|
|
|
for j in range(len(valsY)): |
|
278
|
|
|
xl = OWCanvasText(self.canvas, "", 0, 0, htmlText = getHtmlCompatibleString(data.domain[self.attrY].values[j]), show=False) |
|
|
|
|
|
|
279
|
|
|
max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w) |
|
280
|
|
|
max_ylabel_w = min(max_ylabel_w, 200) #upper limit for label widths |
|
281
|
|
|
|
|
282
|
|
|
# get text width of Y attribute name |
|
283
|
|
|
text = OWCanvasText(self.canvas, data.domain[self.attrY].name, x = 0, y = 0, bold = 1, show = 0, vertical=True) |
|
284
|
|
|
xOff = int(text.boundingRect().height() + max_ylabel_w) |
|
285
|
|
|
yOff = 55 |
|
286
|
|
|
sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 50) |
|
287
|
|
|
if sqareSize < 0: return # canvas is too small to draw rectangles |
|
288
|
|
|
self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height()) |
|
289
|
|
|
|
|
290
|
|
|
# print graph name |
|
291
|
|
|
if self.attrCondition == "(None)": |
|
292
|
|
|
name = "<b>P(%s, %s) ≠ P(%s)×P(%s)</b>" %(self.attrX, self.attrY, self.attrX, self.attrY) |
|
293
|
|
|
else: |
|
294
|
|
|
name = "<b>P(%s, %s | %s = %s) ≠ P(%s | %s = %s)×P(%s | %s = %s)</b>" %(self.attrX, self.attrY, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue), self.attrX, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue), self.attrY, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue)) |
|
|
|
|
|
|
295
|
|
|
OWCanvasText(self.canvas, "" , xOff+ sqareSize/2, 20, Qt.AlignCenter, htmlText = name) |
|
296
|
|
|
OWCanvasText(self.canvas, "N = " + str(len(data)), xOff+ sqareSize/2, 38, Qt.AlignCenter, bold = 0) |
|
297
|
|
|
|
|
298
|
|
|
###################### |
|
299
|
|
|
# compute chi-square |
|
300
|
|
|
chisquare = 0.0 |
|
301
|
|
|
for i in range(len(valsX)): |
|
302
|
|
|
for j in range(len(valsY)): |
|
303
|
|
|
((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] |
|
|
|
|
|
|
304
|
|
|
expected = float(xVal*yVal)/float(sum_) |
|
305
|
|
|
if expected == 0: continue |
|
306
|
|
|
pearson2 = (actual - expected)*(actual - expected) / expected |
|
307
|
|
|
chisquare += pearson2 |
|
308
|
|
|
|
|
309
|
|
|
###################### |
|
310
|
|
|
# draw rectangles |
|
311
|
|
|
currX = xOff |
|
312
|
|
|
max_xlabel_h = 0 |
|
313
|
|
|
|
|
314
|
|
|
normX, normY = sum(valsX), sum(valsY) |
|
315
|
|
|
for i in range(len(valsX)): |
|
316
|
|
|
if valsX[i] == 0: continue |
|
317
|
|
|
currY = yOff |
|
318
|
|
|
width = int(float(sqareSize * valsX[i])/float(normX)) |
|
319
|
|
|
|
|
320
|
|
|
#for j in range(len(valsY)): |
|
321
|
|
|
for j in range(len(valsY)-1, -1, -1): # this way we sort y values correctly |
|
322
|
|
|
((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] |
|
|
|
|
|
|
323
|
|
|
if valsY[j] == 0: continue |
|
324
|
|
|
height = int(float(sqareSize * valsY[j])/float(normY)) |
|
325
|
|
|
|
|
326
|
|
|
# create rectangle |
|
327
|
|
|
rect = OWCanvasRectangle(self.canvas, currX+2, currY+2, width-4, height-4, z = -10) |
|
328
|
|
|
self.addRectIndependencePearson(rect, currX+2, currY+2, width-4, height-4, (xAttr, xVal), (yAttr, yVal), actual, sum_) |
|
|
|
|
|
|
329
|
|
|
|
|
330
|
|
|
expected = float(xVal*yVal)/float(sum_) |
|
331
|
|
|
pearson = (actual - expected) / sqrt(expected) |
|
332
|
|
|
tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(x)): <b>%d (%.2f%%)</b><hr> |
|
|
|
|
|
|
333
|
|
|
<b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(y)): <b>%d (%.2f%%)</b><hr> |
|
334
|
|
|
<b>Number Of Instances (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b> |
|
335
|
|
|
<hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" %(self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0*float(xVal)/float(sum_), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0*float(yVal)/float(sum_), expected, 100.0*float(xVal*yVal)/float(sum_*sum_), actual, 100.0*float(actual)/float(sum_), chisquare, pearson ) |
|
|
|
|
|
|
336
|
|
|
rect.setToolTip(tooltipText) |
|
337
|
|
|
|
|
338
|
|
|
currY += height |
|
339
|
|
|
if currX == xOff: |
|
340
|
|
|
OWCanvasText(self.canvas, "", xOff, currY - height/2, Qt.AlignRight | Qt.AlignVCenter, htmlText = getHtmlCompatibleString(data.domain[self.attrY].values[j])) |
|
|
|
|
|
|
341
|
|
|
|
|
342
|
|
|
xl = OWCanvasText(self.canvas, "", currX + width/2, yOff + sqareSize, Qt.AlignHCenter | Qt.AlignTop, htmlText = getHtmlCompatibleString(data.domain[self.attrX].values[i])) |
|
|
|
|
|
|
343
|
|
|
max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h) |
|
344
|
|
|
|
|
345
|
|
|
currX += width |
|
346
|
|
|
|
|
347
|
|
|
# show attribute names |
|
348
|
|
|
OWCanvasText(self.canvas, self.attrY, 0, yOff + sqareSize/2, Qt.AlignLeft | Qt.AlignVCenter, bold = 1, vertical=True) |
|
|
|
|
|
|
349
|
|
|
OWCanvasText(self.canvas, self.attrX, xOff + sqareSize/2, yOff + sqareSize + max_xlabel_h, Qt.AlignHCenter | Qt.AlignTop, bold = 1) |
|
|
|
|
|
|
350
|
|
|
|
|
351
|
|
|
#self.canvas.update() |
|
352
|
|
|
|
|
353
|
|
|
# create a dictionary with all possible pairs of "combination-of-attr-values" : count |
|
354
|
|
|
def getConditionalDistributions(self, data, attrs): |
|
|
|
|
|
|
355
|
|
|
cond_dist = defaultdict(int) |
|
356
|
|
|
all_attrs = [data.domain[a] for a in attrs] |
|
357
|
|
|
if data.domain.class_var is not None: |
|
358
|
|
|
all_attrs.append(data.domain.class_var) |
|
359
|
|
|
|
|
360
|
|
|
for i in range(1, len(all_attrs) + 1): |
|
361
|
|
|
attr = all_attrs[:i] |
|
362
|
|
|
if type(data) == SqlTable: |
|
363
|
|
|
# make all possible pairs of attributes + class_var |
|
364
|
|
|
attr = [a.to_sql() for a in attr] |
|
365
|
|
|
fields = attr + ["COUNT(*)"] |
|
366
|
|
|
query = data._sql_query(fields, group_by=attr) |
|
|
|
|
|
|
367
|
|
|
with data._execute_sql_query(query) as cur: |
|
|
|
|
|
|
368
|
|
|
res = cur.fetchall() |
|
369
|
|
|
for r in res: |
|
370
|
|
|
str_values =[a.repr_val(a.to_val(x)) for a, x in zip(all_attrs, r[:-1])] |
|
371
|
|
|
str_values = [x if x != '?' else 'None' for x in str_values] |
|
372
|
|
|
cond_dist['-'.join(str_values)] = r[-1] |
|
373
|
|
|
else: |
|
374
|
|
|
for indices in product(*(range(len(a.values)) for a in attr)): |
|
375
|
|
|
vals = [] |
|
376
|
|
|
conditions = [] |
|
377
|
|
|
for k, ind in enumerate(indices): |
|
378
|
|
|
vals.append(attr[k].values[ind]) |
|
379
|
|
|
fd = Orange.data.filter.FilterDiscrete(column=attr[k], values=[attr[k].values[ind]]) |
|
380
|
|
|
conditions.append(fd) |
|
381
|
|
|
filt = Orange.data.filter.Values(conditions) |
|
382
|
|
|
filtdata = filt(data) |
|
383
|
|
|
cond_dist['-'.join(vals)] = len(filtdata) |
|
384
|
|
|
return cond_dist |
|
385
|
|
|
|
|
386
|
|
|
###################################################################### |
|
387
|
|
|
## show deviations from attribute independence with standardized pearson residuals |
|
388
|
|
|
def addRectIndependencePearson(self, rect, x, y, w, h, xAttr_xVal, yAttr_yVal, actual, sum): |
|
|
|
|
|
|
389
|
|
|
xAttr, xVal = xAttr_xVal |
|
|
|
|
|
|
390
|
|
|
yAttr, yVal = yAttr_yVal |
|
|
|
|
|
|
391
|
|
|
expected = float(xVal*yVal)/float(sum) |
|
392
|
|
|
pearson = (actual - expected) / sqrt(expected) |
|
393
|
|
|
|
|
394
|
|
|
if pearson > 0: # if there are more examples that we would expect under the null hypothesis |
|
395
|
|
|
intPearson = floor(pearson) |
|
396
|
|
|
pen = QPen(QColor(0,0,255), 1); rect.setPen(pen) |
|
397
|
|
|
b = 255 |
|
398
|
|
|
r = g = 255 - intPearson*20 |
|
399
|
|
|
r = g = max(r, 55) # |
|
400
|
|
|
elif pearson < 0: |
|
401
|
|
|
intPearson = ceil(pearson) |
|
402
|
|
|
pen = QPen(QColor(255,0,0), 1) |
|
403
|
|
|
rect.setPen(pen) |
|
404
|
|
|
r = 255 |
|
405
|
|
|
b = g = 255 + intPearson*20 |
|
406
|
|
|
b = g = max(b, 55) |
|
407
|
|
|
else: |
|
408
|
|
|
pen = QPen(QColor(255,255,255), 1) |
|
409
|
|
|
r = g = b = 255 # white |
|
410
|
|
|
color = QColor(r,g,b) |
|
411
|
|
|
brush = QBrush(color); rect.setBrush(brush) |
|
412
|
|
|
|
|
413
|
|
|
if self.showCases and w > 6 and h > 6: |
|
414
|
|
|
if self.showInColor: |
|
415
|
|
|
if pearson > 0: c = QColor(0,0,255) |
|
416
|
|
|
else: c = QColor(255, 0,0) |
|
417
|
|
|
else: c = Qt.black |
|
418
|
|
|
for i in range(int(actual)): |
|
|
|
|
|
|
419
|
|
|
OWCanvasEllipse(self.canvas, random.randint(x+1, x + w-4), random.randint(y+1, y + h-4), 3, 3, penColor = c, brushColor = c, z = 100) |
|
|
|
|
|
|
420
|
|
|
|
|
421
|
|
|
if pearson > 0: |
|
422
|
|
|
pearson = min(pearson, 10) |
|
423
|
|
|
kvoc = 1 - 0.08 * pearson # if pearson in [0..10] --> kvoc in [1..0.2] |
|
424
|
|
|
else: |
|
425
|
|
|
pearson = max(pearson, -10) |
|
426
|
|
|
kvoc = 1 - 0.4*pearson |
|
427
|
|
|
|
|
428
|
|
|
self.addLines(x,y,w,h, kvoc, pen) |
|
429
|
|
|
|
|
430
|
|
|
|
|
431
|
|
|
################################################## |
|
432
|
|
|
# add lines |
|
433
|
|
|
def addLines(self, x,y,w,h, diff, pen): |
|
434
|
|
|
if not self.showLines: return |
|
435
|
|
|
if w == 0 or h == 0: return |
|
436
|
|
|
|
|
437
|
|
|
# create lines |
|
438
|
|
|
dist = 20 # original distance between two lines in pixels |
|
439
|
|
|
dist = dist * diff |
|
440
|
|
|
temp = dist |
|
441
|
|
|
while (temp < w): |
|
|
|
|
|
|
442
|
|
|
OWCanvasLine(self.canvas, temp+x, y, temp+x, y+h, 1, pen.color()) |
|
443
|
|
|
temp += dist |
|
444
|
|
|
|
|
445
|
|
|
temp = dist |
|
446
|
|
|
while (temp < h): |
|
|
|
|
|
|
447
|
|
|
OWCanvasLine(self.canvas, x, y+temp, x+w, y+temp, 1, pen.color()) |
|
448
|
|
|
temp += dist |
|
449
|
|
|
|
|
450
|
|
|
def saveToFileCanvas(self): |
|
451
|
|
|
sizeDlg = OWChooseImageSizeDlg(self.canvas, parent=self) |
|
452
|
|
|
sizeDlg.exec_() |
|
453
|
|
|
|
|
454
|
|
|
def closeEvent(self, ce): |
|
455
|
|
|
# self.optimizationDlg.hide() |
|
456
|
|
|
QDialog.closeEvent(self, ce) |
|
457
|
|
|
|
|
458
|
|
|
def save_graph(self): |
|
459
|
|
|
from Orange.widgets.data.owsave import OWSave |
|
460
|
|
|
|
|
461
|
|
|
save_img = OWSave(parent=self, data=self.canvas, |
|
462
|
|
|
file_formats=FileFormats.img_writers) |
|
463
|
|
|
save_img.exec_() |
|
464
|
|
|
|
|
465
|
|
|
# class OWSieveOptimization(OWMosaicOptimization, orngMosaic): |
|
466
|
|
|
# settingsList = ["percentDataUsed", "ignoreTooSmallCells", |
|
467
|
|
|
# "timeLimit", "useTimeLimit", "lastSaveDirName", "projectionLimit", "useProjectionLimit"] |
|
468
|
|
|
# |
|
469
|
|
|
# def __init__(self, visualizationWidget = None, signalManager = None): |
|
470
|
|
|
# OWWidget.__init__(self, None, signalManager, "Sieve Evaluation Dialog", savePosition = True, wantMainArea = 0, wantStatusBar = 1) |
|
|
|
|
|
|
471
|
|
|
# orngMosaic.__init__(self) |
|
472
|
|
|
# |
|
473
|
|
|
# self.resize(390,620) |
|
474
|
|
|
# self.setCaption("Sieve Diagram Evaluation Dialog") |
|
475
|
|
|
# |
|
476
|
|
|
# loaded variables |
|
477
|
|
|
# self.visualizationWidget = visualizationWidget |
|
478
|
|
|
# self.useTimeLimit = 0 |
|
479
|
|
|
# self.useProjectionLimit = 0 |
|
480
|
|
|
# self.qualityMeasure = CHI_SQUARE # we will always compute only chi square with sieve diagram |
|
481
|
|
|
# self.optimizationType = EXACT_NUMBER_OF_ATTRS |
|
482
|
|
|
# self.attributeCount = 2 |
|
483
|
|
|
# self.attrCondition = None |
|
484
|
|
|
# self.attrConditionValue = None |
|
485
|
|
|
# |
|
486
|
|
|
# self.lastSaveDirName = os.getcwd() |
|
487
|
|
|
# |
|
488
|
|
|
# self.attrLenDict = {} |
|
489
|
|
|
# self.shownResults = [] |
|
490
|
|
|
# self.loadSettings() |
|
491
|
|
|
# |
|
492
|
|
|
# self.layout().setMargin(0) |
|
493
|
|
|
# self.tabs = gui.tabWidget(self.controlArea) |
|
494
|
|
|
# self.MainTab = gui.createTabPage(self.tabs, "Main") |
|
495
|
|
|
# self.SettingsTab = gui.createTabPage(self.tabs, "Settings") |
|
496
|
|
|
# self.ManageTab = gui.createTabPage(self.tabs, "Manage") |
|
497
|
|
|
# |
|
498
|
|
|
########################### |
|
499
|
|
|
# MAIN TAB |
|
500
|
|
|
# box = gui.widgetBox(self.MainTab, box = "Condition") |
|
501
|
|
|
# self.attrConditionCombo = gui.comboBoxWithCaption(box, self, "attrCondition", "Attribute:", callback = self.updateConditionAttr, sendSelectedValue = 1, valueType = str, labelWidth = 70) |
|
|
|
|
|
|
502
|
|
|
# self.attrConditionValueCombo = gui.comboBoxWithCaption(box, self, "attrConditionValue", "Value:", sendSelectedValue = 1, valueType = str, labelWidth = 70) |
|
|
|
|
|
|
503
|
|
|
# |
|
504
|
|
|
# self.optimizationBox = gui.widgetBox(self.MainTab, "Evaluate") |
|
505
|
|
|
# self.buttonBox = gui.widgetBox(self.optimizationBox, orientation = "horizontal") |
|
506
|
|
|
# self.resultsBox = gui.widgetBox(self.MainTab, "Projection List Ordered by Chi-Square") |
|
507
|
|
|
# |
|
508
|
|
|
# self.label1 = gui.widgetLabel(self.buttonBox, 'Projections with ') |
|
509
|
|
|
# self.optimizationTypeCombo = gui.comboBox(self.buttonBox, self, "optimizationType", items = [" exactly ", " maximum "] ) |
|
|
|
|
|
|
510
|
|
|
# self.attributeCountCombo = gui.comboBox(self.buttonBox, self, "attributeCount", items = range(1, 5), tooltip = "Evaluate only projections with exactly (or maximum) this number of attributes", sendSelectedValue = 1, valueType = int) |
|
|
|
|
|
|
511
|
|
|
# self.attributeLabel = gui.widgetLabel(self.buttonBox, ' attributes') |
|
512
|
|
|
# |
|
513
|
|
|
# self.startOptimizationButton = gui.button(self.optimizationBox, self, "Start Evaluating Projections", callback = self.evaluateProjections) |
|
|
|
|
|
|
514
|
|
|
# f = self.startOptimizationButton.font(); f.setBold(1); self.startOptimizationButton.setFont(f) |
|
515
|
|
|
# self.stopOptimizationButton = gui.button(self.optimizationBox, self, "Stop Evaluation", callback = self.stopEvaluationClick) |
|
|
|
|
|
|
516
|
|
|
# self.stopOptimizationButton.setFont(f) |
|
517
|
|
|
# self.stopOptimizationButton.hide() |
|
518
|
|
|
# |
|
519
|
|
|
# self.resultList = gui.listBox(self.resultsBox, self, callback = self.showSelectedAttributes) |
|
520
|
|
|
# self.resultList.setMinimumHeight(200) |
|
521
|
|
|
# |
|
522
|
|
|
########################## |
|
523
|
|
|
# SETTINGS TAB |
|
524
|
|
|
# gui.checkBox(self.SettingsTab, self, "ignoreTooSmallCells", "Ignore cells where expected number of cases is less than 5", box = "Ignore small cells", tooltip = "Statisticians advise that in cases when the number of expected examples is less than 5 we ignore the cell \nsince it can significantly influence the chi-square value.") |
|
|
|
|
|
|
525
|
|
|
# |
|
526
|
|
|
# gui.comboBoxWithCaption(self.SettingsTab, self, "percentDataUsed", "Percent of data used: ", box = "Data settings", items = self.percentDataNums, sendSelectedValue = 1, valueType = int, tooltip = "In case that we have a large dataset the evaluation of each projection can take a lot of time.\nWe can therefore use only a subset of randomly selected examples, evaluate projection on them and thus make evaluation faster.") |
|
|
|
|
|
|
527
|
|
|
# |
|
528
|
|
|
# self.stopOptimizationBox = gui.widgetBox(self.SettingsTab, "When to Stop Evaluation or Optimization?") |
|
529
|
|
|
# gui.checkWithSpin(self.stopOptimizationBox, self, "Time limit: ", 1, 1000, "useTimeLimit", "timeLimit", " (minutes)", debuggingEnabled = 0) # disable debugging. we always set this to 1 minute |
|
|
|
|
|
|
530
|
|
|
# gui.checkWithSpin(self.stopOptimizationBox, self, "Use projection count limit: ", 1, 1000000, "useProjectionLimit", "projectionLimit", " (projections)", debuggingEnabled = 0) |
|
|
|
|
|
|
531
|
|
|
# gui.rubber(self.SettingsTab) |
|
532
|
|
|
# |
|
533
|
|
|
########################## |
|
534
|
|
|
# SAVE TAB |
|
535
|
|
|
# self.visualizedAttributesBox = gui.widgetBox(self.ManageTab, "Number of Concurrently Visualized Attributes") |
|
536
|
|
|
# self.dialogsBox = gui.widgetBox(self.ManageTab, "Dialogs") |
|
537
|
|
|
# self.manageResultsBox = gui.widgetBox(self.ManageTab, "Manage projections") |
|
538
|
|
|
# |
|
539
|
|
|
# self.attrLenList = gui.listBox(self.visualizedAttributesBox, self, selectionMode = QListWidget.MultiSelection, callback = self.attrLenListChanged) |
|
|
|
|
|
|
540
|
|
|
# self.attrLenList.setMinimumHeight(60) |
|
541
|
|
|
# |
|
542
|
|
|
# self.buttonBox7 = gui.widgetBox(self.dialogsBox, orientation = "horizontal") |
|
543
|
|
|
# gui.button(self.buttonBox7, self, "Attribute Ranking", self.attributeAnalysis, debuggingEnabled = 0) |
|
544
|
|
|
# gui.button(self.buttonBox7, self, "Graph Projection Scores", self.graphProjectionQuality, debuggingEnabled = 0) |
|
|
|
|
|
|
545
|
|
|
# |
|
546
|
|
|
# hbox = gui.widgetBox(self.manageResultsBox, orientation = "horizontal") |
|
547
|
|
|
# gui.button(hbox, self, "Load", self.load, debuggingEnabled = 0) |
|
548
|
|
|
# gui.button(hbox, self, "Save", self.save, debuggingEnabled = 0) |
|
549
|
|
|
# |
|
550
|
|
|
# hbox = gui.widgetBox(self.manageResultsBox, orientation = "horizontal") |
|
551
|
|
|
# gui.button(hbox, self, "Clear results", self.clearResults) |
|
552
|
|
|
# gui.rubber(self.ManageTab) |
|
553
|
|
|
# |
|
554
|
|
|
# reset some parameters if we are debugging so that it won't take too much time |
|
555
|
|
|
# if orngDebugging.orngDebuggingEnabled: |
|
556
|
|
|
# self.useTimeLimit = 1 |
|
557
|
|
|
# self.timeLimit = 0.3 |
|
558
|
|
|
# self.useProjectionLimit = 1 |
|
559
|
|
|
# self.projectionLimit = 100 |
|
560
|
|
|
# self.icons = self.createAttributeIconDict() |
|
561
|
|
|
# |
|
562
|
|
|
# |
|
563
|
|
|
# when we start evaluating projections save info on the condition - this has to be stored in the |
|
564
|
|
|
# def evaluateProjections(self): |
|
565
|
|
|
# if not self.data: return |
|
566
|
|
|
# self.usedAttrCondition = self.attrCondition |
|
567
|
|
|
# self.usedAttrConditionValue = self.attrConditionValue |
|
568
|
|
|
# self.wholeDataSet = self.data # we have to create a datasubset based on the attrCondition |
|
569
|
|
|
# if self.attrCondition != "(None)": |
|
570
|
|
|
# self.data = self.data.select({self.attrCondition : self.attrConditionValue}) |
|
571
|
|
|
# orngMosaic.setData(self, self.data) |
|
572
|
|
|
# OWMosaicOptimization.evaluateProjections(self) |
|
573
|
|
|
# |
|
574
|
|
|
# this is a handler that is called after we finish evaluating projections (when evaluated all projections, or stop was pressed) |
|
|
|
|
|
|
575
|
|
|
# def finishEvaluation(self, evaluatedProjections): |
|
576
|
|
|
# self.data = self.wholeDataSet # restore the whole data after projection evaluation |
|
577
|
|
|
# OWMosaicOptimization.finishEvaluation(self, evaluatedProjections) |
|
578
|
|
|
# |
|
579
|
|
|
# |
|
580
|
|
|
# def showSelectedAttributes(self, attrs = None): |
|
581
|
|
|
# if not self.visualizationWidget: return |
|
582
|
|
|
# if not attrs: |
|
583
|
|
|
# projection = self.getSelectedProjection() |
|
584
|
|
|
# if not projection: return |
|
585
|
|
|
# self.visualizationWidget.attrCondition = self.usedAttrCondition |
|
586
|
|
|
# self.visualizationWidget.updateConditionAttr() |
|
587
|
|
|
# self.visualizationWidget.attrConditionValue = self.usedAttrConditionValue |
|
588
|
|
|
# (score, attrs, index, extraInfo) = projection |
|
589
|
|
|
# |
|
590
|
|
|
# self.resultList.setFocus() |
|
591
|
|
|
# self.visualizationWidget.setShownAttributes(attrs) |
|
592
|
|
|
# |
|
593
|
|
|
# |
|
594
|
|
|
# def clearResults(self): |
|
595
|
|
|
# orngMosaic.clearResults(self) |
|
596
|
|
|
# self.resultList.clear() |
|
597
|
|
|
# |
|
598
|
|
|
# def setData(self, data, removeUnusedValues = 0): |
|
599
|
|
|
# self.attrConditionCombo.clear() |
|
600
|
|
|
# self.attrConditionCombo.addItem("(None)") |
|
601
|
|
|
# self.attrConditionValueCombo.clear() |
|
602
|
|
|
# self.resultList.clear() |
|
603
|
|
|
# |
|
604
|
|
|
# orngMosaic.setData(self, data, removeUnusedValues) |
|
605
|
|
|
# |
|
606
|
|
|
# self.setStatusBarText("") |
|
607
|
|
|
# if not self.data: return None |
|
608
|
|
|
# |
|
609
|
|
|
# for i in range(len(self.data.domain)): |
|
610
|
|
|
# self.attrConditionCombo.addItem(self.icons[self.data.domain[i].varType], self.data.domain[i].name) |
|
611
|
|
|
# self.attrCondition = str(self.attrConditionCombo.itemText(0)) |
|
612
|
|
|
# |
|
613
|
|
|
# return self.data |
|
614
|
|
|
# |
|
615
|
|
|
# def finishedAddingResults(self): |
|
616
|
|
|
# self.resultList.setCurrentItem(self.resultList.item(0)) |
|
617
|
|
|
# |
|
618
|
|
|
# def updateConditionAttr(self): |
|
619
|
|
|
# self.attrConditionValueCombo.clear() |
|
620
|
|
|
# |
|
621
|
|
|
# if self.attrCondition != "(None)": |
|
622
|
|
|
# for val in self.data.domain[self.attrCondition].values: |
|
623
|
|
|
# self.attrConditionValueCombo.addItem(val) |
|
624
|
|
|
# self.attrConditionValue = str(self.attrConditionValueCombo.itemText(0)) |
|
625
|
|
|
|
|
626
|
|
|
|
|
627
|
|
|
# test widget appearance |
|
628
|
|
|
if __name__=="__main__": |
|
629
|
|
|
a=QApplication(sys.argv) |
|
630
|
|
|
ow=OWSieveDiagram() |
|
631
|
|
|
ow.show() |
|
632
|
|
|
data = Table(r"zoo.tab") |
|
633
|
|
|
ow.setData(data) |
|
634
|
|
|
a.exec_() |
|
635
|
|
|
ow.saveSettings() |
|
636
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.pyfiles in your module folders. Make sure that you place one file in each sub-folder.