1
|
|
|
from collections import defaultdict |
2
|
|
|
from itertools import product |
3
|
|
|
from math import sqrt, floor, ceil |
4
|
|
|
import random |
5
|
|
|
import sys |
6
|
|
|
|
7
|
|
|
from PyQt4.QtCore import Qt |
|
|
|
|
8
|
|
|
from PyQt4.QtGui import (QGraphicsScene, QGraphicsView, QColor, QPen, QBrush, |
|
|
|
|
9
|
|
|
QDialog, QApplication) |
10
|
|
|
|
11
|
|
|
|
12
|
|
|
import Orange |
13
|
|
|
from Orange.data import Table |
14
|
|
|
from Orange.data.sql.table import SqlTable, LARGE_TABLE, DEFAULT_SAMPLE_TIME |
15
|
|
|
from Orange.statistics.contingency import get_contingency |
16
|
|
|
from Orange.widgets import gui |
17
|
|
|
from Orange.widgets.utils import getHtmlCompatibleString |
18
|
|
|
from Orange.widgets.visualize.owmosaic import (OWCanvasText, OWCanvasRectangle, |
19
|
|
|
OWCanvasEllipse, OWCanvasLine) |
20
|
|
|
from Orange.widgets.widget import OWWidget, Default, AttributeList |
21
|
|
|
from Orange.widgets.io import FileFormats |
22
|
|
|
|
23
|
|
|
|
24
|
|
|
class OWSieveDiagram(OWWidget): |
25
|
|
|
name = "Sieve Diagram" |
26
|
|
|
description = "A two-way contingency table providing information in " \ |
27
|
|
|
"relation to expected frequency of combination of feature " \ |
28
|
|
|
"values under independence." |
29
|
|
|
icon = "icons/SieveDiagram.svg" |
30
|
|
|
priority = 4200 |
31
|
|
|
|
32
|
|
|
inputs = [("Data", Table, "setData", Default), |
33
|
|
|
("Features", AttributeList, "setShownAttributes")] |
34
|
|
|
outputs = [] |
35
|
|
|
|
36
|
|
|
settingsList = ["showLines", "showCases", "showInColor"] |
37
|
|
|
|
38
|
|
|
want_graph = True |
39
|
|
|
|
40
|
|
|
def __init__(self,parent=None, signalManager = None): |
41
|
|
|
OWWidget.__init__(self, parent, signalManager, "Sieve diagram", True) |
42
|
|
|
|
43
|
|
|
#self.controlArea.setMinimumWidth(250) |
44
|
|
|
|
45
|
|
|
#set default settings |
46
|
|
|
self.data = None |
47
|
|
|
|
48
|
|
|
self.attrX = "" |
49
|
|
|
self.attrY = "" |
50
|
|
|
self.attrCondition = "" |
51
|
|
|
self.attrConditionValue = "" |
52
|
|
|
self.showLines = 1 |
53
|
|
|
self.showCases = 0 |
54
|
|
|
self.showInColor = 1 |
55
|
|
|
self.attributeSelectionList = None |
56
|
|
|
self.stopCalculating = 0 |
57
|
|
|
|
58
|
|
|
self.canvas = QGraphicsScene() |
59
|
|
|
self.canvasView = QGraphicsView(self.canvas, self.mainArea) |
60
|
|
|
self.mainArea.layout().addWidget(self.canvasView) |
61
|
|
|
self.canvasView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) |
62
|
|
|
self.canvasView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) |
63
|
|
|
|
64
|
|
|
#GUI |
65
|
|
|
self.attrSelGroup = gui.widgetBox(self.controlArea, box = "Shown attributes") |
66
|
|
|
|
67
|
|
|
self.attrXCombo = gui.comboBox( |
68
|
|
|
self.attrSelGroup, self, value="attrX", label="X attribute:", |
69
|
|
|
orientation="horizontal", tooltip="Select an attribute to be shown on the X axis", |
70
|
|
|
callback=self.updateGraph, sendSelectedValue=1, valueType=str, |
71
|
|
|
labelWidth=70, contentsLength=12) |
72
|
|
|
|
73
|
|
|
self.attrYCombo = gui.comboBox( |
74
|
|
|
self.attrSelGroup, self, value="attrY", label="Y attribute:", |
75
|
|
|
orientation="horizontal", tooltip="Select an attribute to be shown on the Y axis", |
76
|
|
|
callback=self.updateGraph, sendSelectedValue=1, valueType=str, |
77
|
|
|
labelWidth=70, contentsLength=12) |
78
|
|
|
|
79
|
|
|
gui.separator(self.controlArea) |
80
|
|
|
|
81
|
|
|
self.conditionGroup = gui.widgetBox(self.controlArea, box = "Condition") |
82
|
|
|
self.attrConditionCombo = gui.comboBox( |
83
|
|
|
self.conditionGroup, self, value="attrCondition", |
84
|
|
|
label="Attribute:", orientation="horizontal", |
85
|
|
|
callback=self.updateConditionAttr, sendSelectedValue=True, |
86
|
|
|
valueType=str, labelWidth=70, contentsLength=12) |
87
|
|
|
self.attrConditionValueCombo = gui.comboBox( |
88
|
|
|
self.conditionGroup, self, value="attrConditionValue", |
89
|
|
|
label="Value:", orientation="horizontal", callback=self.updateGraph, |
90
|
|
|
sendSelectedValue=True, valueType=str, labelWidth=70, |
91
|
|
|
contentsLength=10) |
92
|
|
|
|
93
|
|
|
gui.separator(self.controlArea) |
94
|
|
|
|
95
|
|
|
box2 = gui.widgetBox(self.controlArea, box = "Visual settings") |
96
|
|
|
gui.checkBox(box2, self, "showLines", "Show squares (observed frequency)", callback = self.updateGraph) |
97
|
|
|
hbox = gui.widgetBox(box2, orientation = "horizontal") |
98
|
|
|
gui.checkBox(hbox, self, "showCases", "Show data instances...", callback = self.updateGraph) |
99
|
|
|
gui.checkBox(hbox, self, "showInColor", "...in color", callback = self.updateGraph) |
100
|
|
|
|
101
|
|
|
gui.separator(self.controlArea) |
102
|
|
|
# self.optimizationDlg = OWSieveOptimization(self, self.signalManager) |
103
|
|
|
# optimizationButtons = gui.widgetBox(self.controlArea, "Dialogs", orientation = "horizontal") |
104
|
|
|
# gui.button(optimizationButtons, self, "VizRank", callback = self.optimizationDlg.reshow, debuggingEnabled = 0, tooltip = "Find attribute groups with highest value dependency") |
|
|
|
|
105
|
|
|
|
106
|
|
|
gui.rubber(self.controlArea) |
107
|
|
|
|
108
|
|
|
# self.wdChildDialogs = [self.optimizationDlg] # used when running widget debugging |
109
|
|
|
# self.graphButton.clicked.connect(self.saveToFileCanvas) |
110
|
|
|
self.icons = gui.attributeIconDict |
111
|
|
|
self.resize(800, 550) |
112
|
|
|
random.seed() |
113
|
|
|
self.graphButton.clicked.connect(self.save_graph) |
114
|
|
|
|
115
|
|
|
def sendReport(self): |
116
|
|
|
self.startReport("%s [%s, %s]" % (self.windowTitle(), self.attrX, self.attrY)) |
117
|
|
|
self.reportSettings("", |
118
|
|
|
[("X-Attribute", self.attrX), ("Y-Attribute", self.attrY), |
119
|
|
|
self.attrCondition != "(None)" and ("Condition", "%s = '%s'" % (self.attrCondition, self.attrConditionValue))]) |
|
|
|
|
120
|
|
|
# self.reportImage(lambda *x: OWChooseImageSizeDlg(self.canvas).saveImage(*x)) |
121
|
|
|
|
122
|
|
|
|
123
|
|
|
# receive new data and update all fields |
124
|
|
|
def setData(self, data): |
|
|
|
|
125
|
|
|
if type(data) == SqlTable and data.approx_len() > LARGE_TABLE: |
126
|
|
|
data = data.sample_time(DEFAULT_SAMPLE_TIME) |
127
|
|
|
|
128
|
|
|
self.information(0) |
129
|
|
|
self.information(1) |
130
|
|
|
sameDomain = self.data and data and self.data.domain.checksum() == data.domain.checksum() # preserve attribute choice if the domain is the same |
|
|
|
|
131
|
|
|
# self.data = self.optimizationDlg.setData(data, 0) |
132
|
|
|
self.data = data |
133
|
|
|
|
134
|
|
|
if not sameDomain: |
135
|
|
|
self.initCombos() |
136
|
|
|
|
137
|
|
|
self.warning(0, "") |
138
|
|
|
if data: |
139
|
|
|
if any(attr.is_continuous for attr in data.domain): |
140
|
|
|
self.warning(0, "Data contains continuous variables. " + |
141
|
|
|
"Discretize the data to use them.") |
142
|
|
|
|
143
|
|
|
self.setShownAttributes(self.attributeSelectionList) |
144
|
|
|
|
145
|
|
|
## Attribute selection signal |
146
|
|
|
def setShownAttributes(self, attrList): |
147
|
|
|
self.attributeSelectionList = attrList |
148
|
|
|
if self.data and self.attributeSelectionList and len(attrList) >= 2: |
149
|
|
|
attrs = [attr.name for attr in self.data.domain] |
150
|
|
|
if attrList[0] in attrs and attrList[1] in attrs: |
151
|
|
|
self.attrX = attrList[0] |
152
|
|
|
self.attrY = attrList[1] |
153
|
|
|
self.updateGraph() |
154
|
|
|
|
155
|
|
|
|
156
|
|
|
|
157
|
|
|
# create data subset depending on conditional attribute and value |
158
|
|
|
def getConditionalData(self, xAttr = None, yAttr = None, dropMissingData = 1): |
|
|
|
|
159
|
|
|
if not self.data: return None |
160
|
|
|
|
161
|
|
|
if not xAttr: xAttr = self.attrX |
162
|
|
|
if not yAttr: yAttr = self.attrY |
163
|
|
|
if not (xAttr and yAttr): return |
164
|
|
|
|
165
|
|
|
if self.attrCondition == "(None)": |
166
|
|
|
data = self.data[:, [xAttr, yAttr]] |
|
|
|
|
167
|
|
|
# data = self.data.select([xAttr, yAttr]) |
168
|
|
|
else: |
169
|
|
|
# data = orange.Preprocessor_dropMissing(self.data.select([xAttr, yAttr, self.attrCondition])) |
170
|
|
|
# data = self.data.select({self.attrCondition:self.attrConditionValue}) |
171
|
|
|
fd = Orange.data.filter.FilterDiscrete(column=self.attrCondition, values=[self.attrConditionValue]) |
172
|
|
|
filt = Orange.data.filter.Values([fd]) |
173
|
|
|
filt.domain = self.data.domain |
174
|
|
|
data = filt(self.data) |
175
|
|
|
|
176
|
|
|
# if dropMissingData: return orange.Preprocessor_dropMissing(data) |
177
|
|
|
#else: |
178
|
|
|
return data |
179
|
|
|
|
180
|
|
|
# new conditional attribute was set - update graph |
181
|
|
|
def updateConditionAttr(self): |
182
|
|
|
self.attrConditionValueCombo.clear() |
183
|
|
|
|
184
|
|
|
if self.attrCondition != "(None)": |
185
|
|
|
for val in self.data.domain[self.attrCondition].values: |
186
|
|
|
self.attrConditionValueCombo.addItem(val) |
187
|
|
|
self.attrConditionValue = str(self.attrConditionValueCombo.itemText(0)) |
188
|
|
|
self.updateGraph() |
189
|
|
|
|
190
|
|
|
# initialize lists for shown and hidden attributes |
191
|
|
|
def initCombos(self): |
192
|
|
|
self.attrXCombo.clear() |
193
|
|
|
self.attrYCombo.clear() |
194
|
|
|
self.attrConditionCombo.clear() |
195
|
|
|
self.attrConditionCombo.addItem("(None)") |
196
|
|
|
self.attrConditionValueCombo.clear() |
197
|
|
|
|
198
|
|
|
if not self.data: return |
199
|
|
|
for i, var in enumerate(self.data.domain): |
200
|
|
|
if var.is_discrete: |
201
|
|
|
self.attrXCombo.addItem(self.icons[self.data.domain[i]], self.data.domain[i].name) |
202
|
|
|
self.attrYCombo.addItem(self.icons[self.data.domain[i]], self.data.domain[i].name) |
203
|
|
|
self.attrConditionCombo.addItem(self.icons[self.data.domain[i]], self.data.domain[i].name) |
204
|
|
|
self.attrCondition = str(self.attrConditionCombo.itemText(0)) |
205
|
|
|
|
206
|
|
|
if self.attrXCombo.count() > 0: |
207
|
|
|
self.attrX = str(self.attrXCombo.itemText(0)) |
208
|
|
|
self.attrY = str(self.attrYCombo.itemText(self.attrYCombo.count() > 1)) |
209
|
|
|
else: |
210
|
|
|
self.attrX = None |
211
|
|
|
self.attrY = None |
212
|
|
|
|
213
|
|
|
def resizeEvent(self, e): |
214
|
|
|
OWWidget.resizeEvent(self,e) |
215
|
|
|
self.updateGraph() |
216
|
|
|
|
217
|
|
|
def showEvent(self, ev): |
218
|
|
|
OWWidget.showEvent(self, ev) |
219
|
|
|
self.updateGraph() |
220
|
|
|
|
221
|
|
|
## updateGraph - gets called every time the graph has to be updated |
222
|
|
|
def updateGraph(self, *args): |
|
|
|
|
223
|
|
|
for item in self.canvas.items(): |
224
|
|
|
self.canvas.removeItem(item) # remove all canvas items |
225
|
|
|
if not self.data: return |
226
|
|
|
if not self.attrX or not self.attrY: return |
227
|
|
|
|
228
|
|
|
data = self.getConditionalData() |
|
|
|
|
229
|
|
|
if not data or len(data) == 0: return |
230
|
|
|
|
231
|
|
|
valsX = [] |
232
|
|
|
valsY = [] |
233
|
|
|
# contX = orange.ContingencyAttrAttr(self.attrX, self.attrX, data) # distribution of X attribute |
234
|
|
|
# contY = orange.ContingencyAttrAttr(self.attrY, self.attrY, data) # distribution of Y attribute |
235
|
|
|
contX = get_contingency(data, self.attrX, self.attrX) |
236
|
|
|
contY = get_contingency(data, self.attrY, self.attrY) |
237
|
|
|
|
238
|
|
|
# compute contingency of x and y attributes |
239
|
|
|
for entry in contX: |
240
|
|
|
sum_ = 0 |
241
|
|
|
try: |
242
|
|
|
for val in entry: sum_ += val |
243
|
|
|
except: pass |
|
|
|
|
244
|
|
|
valsX.append(sum_) |
245
|
|
|
|
246
|
|
|
for entry in contY: |
247
|
|
|
sum_ = 0 |
248
|
|
|
try: |
249
|
|
|
for val in entry: sum_ += val |
250
|
|
|
except: pass |
|
|
|
|
251
|
|
|
valsY.append(sum_) |
252
|
|
|
|
253
|
|
|
# create cartesian product of selected attributes and compute contingency |
254
|
|
|
# (cart, profit) = FeatureByCartesianProduct(data, [data.domain[self.attrX], data.domain[self.attrY]]) |
255
|
|
|
# tempData = data.select(list(data.domain) + [cart]) |
256
|
|
|
# contXY = orange.ContingencyAttrAttr(cart, cart, tempData) # distribution of X attribute |
257
|
|
|
# contXY = get_contingency(tempData, cart, cart) |
258
|
|
|
contXY = self.getConditionalDistributions(data, [data.domain[self.attrX], data.domain[self.attrY]]) |
259
|
|
|
|
260
|
|
|
# compute probabilities |
261
|
|
|
probs = {} |
262
|
|
|
for i in range(len(valsX)): |
263
|
|
|
valx = valsX[i] |
264
|
|
|
for j in range(len(valsY)): |
265
|
|
|
valy = valsY[j] |
266
|
|
|
|
267
|
|
|
actualProb = 0 |
268
|
|
|
try: |
269
|
|
|
actualProb = contXY['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] |
270
|
|
|
# for val in contXY['%s-%s' %(i, j)]: actualProb += val |
271
|
|
|
except: |
|
|
|
|
272
|
|
|
actualProb = 0 |
273
|
|
|
probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] = ((data.domain[self.attrX].values[i], valx), (data.domain[self.attrY].values[j], valy), actualProb, len(data)) |
|
|
|
|
274
|
|
|
|
275
|
|
|
#get text width of Y labels |
276
|
|
|
max_ylabel_w = 0 |
277
|
|
|
for j in range(len(valsY)): |
278
|
|
|
xl = OWCanvasText(self.canvas, "", 0, 0, htmlText = getHtmlCompatibleString(data.domain[self.attrY].values[j]), show=False) |
|
|
|
|
279
|
|
|
max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w) |
280
|
|
|
max_ylabel_w = min(max_ylabel_w, 200) #upper limit for label widths |
281
|
|
|
|
282
|
|
|
# get text width of Y attribute name |
283
|
|
|
text = OWCanvasText(self.canvas, data.domain[self.attrY].name, x = 0, y = 0, bold = 1, show = 0, vertical=True) |
284
|
|
|
xOff = int(text.boundingRect().height() + max_ylabel_w) |
285
|
|
|
yOff = 55 |
286
|
|
|
sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 50) |
287
|
|
|
if sqareSize < 0: return # canvas is too small to draw rectangles |
288
|
|
|
self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height()) |
289
|
|
|
|
290
|
|
|
# print graph name |
291
|
|
|
if self.attrCondition == "(None)": |
292
|
|
|
name = "<b>P(%s, %s) ≠ P(%s)×P(%s)</b>" %(self.attrX, self.attrY, self.attrX, self.attrY) |
293
|
|
|
else: |
294
|
|
|
name = "<b>P(%s, %s | %s = %s) ≠ P(%s | %s = %s)×P(%s | %s = %s)</b>" %(self.attrX, self.attrY, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue), self.attrX, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue), self.attrY, self.attrCondition, getHtmlCompatibleString(self.attrConditionValue)) |
|
|
|
|
295
|
|
|
OWCanvasText(self.canvas, "" , xOff+ sqareSize/2, 20, Qt.AlignCenter, htmlText = name) |
296
|
|
|
OWCanvasText(self.canvas, "N = " + str(len(data)), xOff+ sqareSize/2, 38, Qt.AlignCenter, bold = 0) |
297
|
|
|
|
298
|
|
|
###################### |
299
|
|
|
# compute chi-square |
300
|
|
|
chisquare = 0.0 |
301
|
|
|
for i in range(len(valsX)): |
302
|
|
|
for j in range(len(valsY)): |
303
|
|
|
((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] |
|
|
|
|
304
|
|
|
expected = float(xVal*yVal)/float(sum_) |
305
|
|
|
if expected == 0: continue |
306
|
|
|
pearson2 = (actual - expected)*(actual - expected) / expected |
307
|
|
|
chisquare += pearson2 |
308
|
|
|
|
309
|
|
|
###################### |
310
|
|
|
# draw rectangles |
311
|
|
|
currX = xOff |
312
|
|
|
max_xlabel_h = 0 |
313
|
|
|
|
314
|
|
|
normX, normY = sum(valsX), sum(valsY) |
315
|
|
|
for i in range(len(valsX)): |
316
|
|
|
if valsX[i] == 0: continue |
317
|
|
|
currY = yOff |
318
|
|
|
width = int(float(sqareSize * valsX[i])/float(normX)) |
319
|
|
|
|
320
|
|
|
#for j in range(len(valsY)): |
321
|
|
|
for j in range(len(valsY)-1, -1, -1): # this way we sort y values correctly |
322
|
|
|
((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] |
|
|
|
|
323
|
|
|
if valsY[j] == 0: continue |
324
|
|
|
height = int(float(sqareSize * valsY[j])/float(normY)) |
325
|
|
|
|
326
|
|
|
# create rectangle |
327
|
|
|
rect = OWCanvasRectangle(self.canvas, currX+2, currY+2, width-4, height-4, z = -10) |
328
|
|
|
self.addRectIndependencePearson(rect, currX+2, currY+2, width-4, height-4, (xAttr, xVal), (yAttr, yVal), actual, sum_) |
|
|
|
|
329
|
|
|
|
330
|
|
|
expected = float(xVal*yVal)/float(sum_) |
331
|
|
|
pearson = (actual - expected) / sqrt(expected) |
332
|
|
|
tooltipText = """<b>X Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(x)): <b>%d (%.2f%%)</b><hr> |
|
|
|
|
333
|
|
|
<b>Y Attribute: %s</b><br>Value: <b>%s</b><br>Number of instances (p(y)): <b>%d (%.2f%%)</b><hr> |
334
|
|
|
<b>Number Of Instances (Probabilities):</b><br>Expected (p(x)p(y)): <b>%.1f (%.2f%%)</b><br>Actual (p(x,y)): <b>%d (%.2f%%)</b> |
335
|
|
|
<hr><b>Statistics:</b><br>Chi-square: <b>%.2f</b><br>Standardized Pearson residual: <b>%.2f</b>""" %(self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0*float(xVal)/float(sum_), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0*float(yVal)/float(sum_), expected, 100.0*float(xVal*yVal)/float(sum_*sum_), actual, 100.0*float(actual)/float(sum_), chisquare, pearson ) |
|
|
|
|
336
|
|
|
rect.setToolTip(tooltipText) |
337
|
|
|
|
338
|
|
|
currY += height |
339
|
|
|
if currX == xOff: |
340
|
|
|
OWCanvasText(self.canvas, "", xOff, currY - height/2, Qt.AlignRight | Qt.AlignVCenter, htmlText = getHtmlCompatibleString(data.domain[self.attrY].values[j])) |
|
|
|
|
341
|
|
|
|
342
|
|
|
xl = OWCanvasText(self.canvas, "", currX + width/2, yOff + sqareSize, Qt.AlignHCenter | Qt.AlignTop, htmlText = getHtmlCompatibleString(data.domain[self.attrX].values[i])) |
|
|
|
|
343
|
|
|
max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h) |
344
|
|
|
|
345
|
|
|
currX += width |
346
|
|
|
|
347
|
|
|
# show attribute names |
348
|
|
|
OWCanvasText(self.canvas, self.attrY, 0, yOff + sqareSize/2, Qt.AlignLeft | Qt.AlignVCenter, bold = 1, vertical=True) |
|
|
|
|
349
|
|
|
OWCanvasText(self.canvas, self.attrX, xOff + sqareSize/2, yOff + sqareSize + max_xlabel_h, Qt.AlignHCenter | Qt.AlignTop, bold = 1) |
|
|
|
|
350
|
|
|
|
351
|
|
|
#self.canvas.update() |
352
|
|
|
|
353
|
|
|
# create a dictionary with all possible pairs of "combination-of-attr-values" : count |
354
|
|
|
def getConditionalDistributions(self, data, attrs): |
|
|
|
|
355
|
|
|
cond_dist = defaultdict(int) |
356
|
|
|
all_attrs = [data.domain[a] for a in attrs] |
357
|
|
|
if data.domain.class_var is not None: |
358
|
|
|
all_attrs.append(data.domain.class_var) |
359
|
|
|
|
360
|
|
|
for i in range(1, len(all_attrs) + 1): |
361
|
|
|
attr = all_attrs[:i] |
362
|
|
|
if type(data) == SqlTable: |
363
|
|
|
# make all possible pairs of attributes + class_var |
364
|
|
|
attr = [a.to_sql() for a in attr] |
365
|
|
|
fields = attr + ["COUNT(*)"] |
366
|
|
|
query = data._sql_query(fields, group_by=attr) |
|
|
|
|
367
|
|
|
with data._execute_sql_query(query) as cur: |
|
|
|
|
368
|
|
|
res = cur.fetchall() |
369
|
|
|
for r in res: |
370
|
|
|
str_values =[a.repr_val(a.to_val(x)) for a, x in zip(all_attrs, r[:-1])] |
371
|
|
|
str_values = [x if x != '?' else 'None' for x in str_values] |
372
|
|
|
cond_dist['-'.join(str_values)] = r[-1] |
373
|
|
|
else: |
374
|
|
|
for indices in product(*(range(len(a.values)) for a in attr)): |
375
|
|
|
vals = [] |
376
|
|
|
conditions = [] |
377
|
|
|
for k, ind in enumerate(indices): |
378
|
|
|
vals.append(attr[k].values[ind]) |
379
|
|
|
fd = Orange.data.filter.FilterDiscrete(column=attr[k], values=[attr[k].values[ind]]) |
380
|
|
|
conditions.append(fd) |
381
|
|
|
filt = Orange.data.filter.Values(conditions) |
382
|
|
|
filtdata = filt(data) |
383
|
|
|
cond_dist['-'.join(vals)] = len(filtdata) |
384
|
|
|
return cond_dist |
385
|
|
|
|
386
|
|
|
###################################################################### |
387
|
|
|
## show deviations from attribute independence with standardized pearson residuals |
388
|
|
|
def addRectIndependencePearson(self, rect, x, y, w, h, xAttr_xVal, yAttr_yVal, actual, sum): |
|
|
|
|
389
|
|
|
xAttr, xVal = xAttr_xVal |
|
|
|
|
390
|
|
|
yAttr, yVal = yAttr_yVal |
|
|
|
|
391
|
|
|
expected = float(xVal*yVal)/float(sum) |
392
|
|
|
pearson = (actual - expected) / sqrt(expected) |
393
|
|
|
|
394
|
|
|
if pearson > 0: # if there are more examples that we would expect under the null hypothesis |
395
|
|
|
intPearson = floor(pearson) |
396
|
|
|
pen = QPen(QColor(0,0,255), 1); rect.setPen(pen) |
397
|
|
|
b = 255 |
398
|
|
|
r = g = 255 - intPearson*20 |
399
|
|
|
r = g = max(r, 55) # |
400
|
|
|
elif pearson < 0: |
401
|
|
|
intPearson = ceil(pearson) |
402
|
|
|
pen = QPen(QColor(255,0,0), 1) |
403
|
|
|
rect.setPen(pen) |
404
|
|
|
r = 255 |
405
|
|
|
b = g = 255 + intPearson*20 |
406
|
|
|
b = g = max(b, 55) |
407
|
|
|
else: |
408
|
|
|
pen = QPen(QColor(255,255,255), 1) |
409
|
|
|
r = g = b = 255 # white |
410
|
|
|
color = QColor(r,g,b) |
411
|
|
|
brush = QBrush(color); rect.setBrush(brush) |
412
|
|
|
|
413
|
|
|
if self.showCases and w > 6 and h > 6: |
414
|
|
|
if self.showInColor: |
415
|
|
|
if pearson > 0: c = QColor(0,0,255) |
416
|
|
|
else: c = QColor(255, 0,0) |
417
|
|
|
else: c = Qt.black |
418
|
|
|
for i in range(int(actual)): |
|
|
|
|
419
|
|
|
OWCanvasEllipse(self.canvas, random.randint(x+1, x + w-4), random.randint(y+1, y + h-4), 3, 3, penColor = c, brushColor = c, z = 100) |
|
|
|
|
420
|
|
|
|
421
|
|
|
if pearson > 0: |
422
|
|
|
pearson = min(pearson, 10) |
423
|
|
|
kvoc = 1 - 0.08 * pearson # if pearson in [0..10] --> kvoc in [1..0.2] |
424
|
|
|
else: |
425
|
|
|
pearson = max(pearson, -10) |
426
|
|
|
kvoc = 1 - 0.4*pearson |
427
|
|
|
|
428
|
|
|
self.addLines(x,y,w,h, kvoc, pen) |
429
|
|
|
|
430
|
|
|
|
431
|
|
|
################################################## |
432
|
|
|
# add lines |
433
|
|
|
def addLines(self, x,y,w,h, diff, pen): |
434
|
|
|
if not self.showLines: return |
435
|
|
|
if w == 0 or h == 0: return |
436
|
|
|
|
437
|
|
|
# create lines |
438
|
|
|
dist = 20 # original distance between two lines in pixels |
439
|
|
|
dist = dist * diff |
440
|
|
|
temp = dist |
441
|
|
|
while (temp < w): |
|
|
|
|
442
|
|
|
OWCanvasLine(self.canvas, temp+x, y, temp+x, y+h, 1, pen.color()) |
443
|
|
|
temp += dist |
444
|
|
|
|
445
|
|
|
temp = dist |
446
|
|
|
while (temp < h): |
|
|
|
|
447
|
|
|
OWCanvasLine(self.canvas, x, y+temp, x+w, y+temp, 1, pen.color()) |
448
|
|
|
temp += dist |
449
|
|
|
|
450
|
|
|
def saveToFileCanvas(self): |
451
|
|
|
sizeDlg = OWChooseImageSizeDlg(self.canvas, parent=self) |
452
|
|
|
sizeDlg.exec_() |
453
|
|
|
|
454
|
|
|
def closeEvent(self, ce): |
455
|
|
|
# self.optimizationDlg.hide() |
456
|
|
|
QDialog.closeEvent(self, ce) |
457
|
|
|
|
458
|
|
|
def save_graph(self): |
459
|
|
|
from Orange.widgets.data.owsave import OWSave |
460
|
|
|
|
461
|
|
|
save_img = OWSave(parent=self, data=self.canvas, |
462
|
|
|
file_formats=FileFormats.img_writers) |
463
|
|
|
save_img.exec_() |
464
|
|
|
|
465
|
|
|
# class OWSieveOptimization(OWMosaicOptimization, orngMosaic): |
466
|
|
|
# settingsList = ["percentDataUsed", "ignoreTooSmallCells", |
467
|
|
|
# "timeLimit", "useTimeLimit", "lastSaveDirName", "projectionLimit", "useProjectionLimit"] |
468
|
|
|
# |
469
|
|
|
# def __init__(self, visualizationWidget = None, signalManager = None): |
470
|
|
|
# OWWidget.__init__(self, None, signalManager, "Sieve Evaluation Dialog", savePosition = True, wantMainArea = 0, wantStatusBar = 1) |
|
|
|
|
471
|
|
|
# orngMosaic.__init__(self) |
472
|
|
|
# |
473
|
|
|
# self.resize(390,620) |
474
|
|
|
# self.setCaption("Sieve Diagram Evaluation Dialog") |
475
|
|
|
# |
476
|
|
|
# loaded variables |
477
|
|
|
# self.visualizationWidget = visualizationWidget |
478
|
|
|
# self.useTimeLimit = 0 |
479
|
|
|
# self.useProjectionLimit = 0 |
480
|
|
|
# self.qualityMeasure = CHI_SQUARE # we will always compute only chi square with sieve diagram |
481
|
|
|
# self.optimizationType = EXACT_NUMBER_OF_ATTRS |
482
|
|
|
# self.attributeCount = 2 |
483
|
|
|
# self.attrCondition = None |
484
|
|
|
# self.attrConditionValue = None |
485
|
|
|
# |
486
|
|
|
# self.lastSaveDirName = os.getcwd() |
487
|
|
|
# |
488
|
|
|
# self.attrLenDict = {} |
489
|
|
|
# self.shownResults = [] |
490
|
|
|
# self.loadSettings() |
491
|
|
|
# |
492
|
|
|
# self.layout().setMargin(0) |
493
|
|
|
# self.tabs = gui.tabWidget(self.controlArea) |
494
|
|
|
# self.MainTab = gui.createTabPage(self.tabs, "Main") |
495
|
|
|
# self.SettingsTab = gui.createTabPage(self.tabs, "Settings") |
496
|
|
|
# self.ManageTab = gui.createTabPage(self.tabs, "Manage") |
497
|
|
|
# |
498
|
|
|
########################### |
499
|
|
|
# MAIN TAB |
500
|
|
|
# box = gui.widgetBox(self.MainTab, box = "Condition") |
501
|
|
|
# self.attrConditionCombo = gui.comboBoxWithCaption(box, self, "attrCondition", "Attribute:", callback = self.updateConditionAttr, sendSelectedValue = 1, valueType = str, labelWidth = 70) |
|
|
|
|
502
|
|
|
# self.attrConditionValueCombo = gui.comboBoxWithCaption(box, self, "attrConditionValue", "Value:", sendSelectedValue = 1, valueType = str, labelWidth = 70) |
|
|
|
|
503
|
|
|
# |
504
|
|
|
# self.optimizationBox = gui.widgetBox(self.MainTab, "Evaluate") |
505
|
|
|
# self.buttonBox = gui.widgetBox(self.optimizationBox, orientation = "horizontal") |
506
|
|
|
# self.resultsBox = gui.widgetBox(self.MainTab, "Projection List Ordered by Chi-Square") |
507
|
|
|
# |
508
|
|
|
# self.label1 = gui.widgetLabel(self.buttonBox, 'Projections with ') |
509
|
|
|
# self.optimizationTypeCombo = gui.comboBox(self.buttonBox, self, "optimizationType", items = [" exactly ", " maximum "] ) |
|
|
|
|
510
|
|
|
# self.attributeCountCombo = gui.comboBox(self.buttonBox, self, "attributeCount", items = range(1, 5), tooltip = "Evaluate only projections with exactly (or maximum) this number of attributes", sendSelectedValue = 1, valueType = int) |
|
|
|
|
511
|
|
|
# self.attributeLabel = gui.widgetLabel(self.buttonBox, ' attributes') |
512
|
|
|
# |
513
|
|
|
# self.startOptimizationButton = gui.button(self.optimizationBox, self, "Start Evaluating Projections", callback = self.evaluateProjections) |
|
|
|
|
514
|
|
|
# f = self.startOptimizationButton.font(); f.setBold(1); self.startOptimizationButton.setFont(f) |
515
|
|
|
# self.stopOptimizationButton = gui.button(self.optimizationBox, self, "Stop Evaluation", callback = self.stopEvaluationClick) |
|
|
|
|
516
|
|
|
# self.stopOptimizationButton.setFont(f) |
517
|
|
|
# self.stopOptimizationButton.hide() |
518
|
|
|
# |
519
|
|
|
# self.resultList = gui.listBox(self.resultsBox, self, callback = self.showSelectedAttributes) |
520
|
|
|
# self.resultList.setMinimumHeight(200) |
521
|
|
|
# |
522
|
|
|
########################## |
523
|
|
|
# SETTINGS TAB |
524
|
|
|
# gui.checkBox(self.SettingsTab, self, "ignoreTooSmallCells", "Ignore cells where expected number of cases is less than 5", box = "Ignore small cells", tooltip = "Statisticians advise that in cases when the number of expected examples is less than 5 we ignore the cell \nsince it can significantly influence the chi-square value.") |
|
|
|
|
525
|
|
|
# |
526
|
|
|
# gui.comboBoxWithCaption(self.SettingsTab, self, "percentDataUsed", "Percent of data used: ", box = "Data settings", items = self.percentDataNums, sendSelectedValue = 1, valueType = int, tooltip = "In case that we have a large dataset the evaluation of each projection can take a lot of time.\nWe can therefore use only a subset of randomly selected examples, evaluate projection on them and thus make evaluation faster.") |
|
|
|
|
527
|
|
|
# |
528
|
|
|
# self.stopOptimizationBox = gui.widgetBox(self.SettingsTab, "When to Stop Evaluation or Optimization?") |
529
|
|
|
# gui.checkWithSpin(self.stopOptimizationBox, self, "Time limit: ", 1, 1000, "useTimeLimit", "timeLimit", " (minutes)", debuggingEnabled = 0) # disable debugging. we always set this to 1 minute |
|
|
|
|
530
|
|
|
# gui.checkWithSpin(self.stopOptimizationBox, self, "Use projection count limit: ", 1, 1000000, "useProjectionLimit", "projectionLimit", " (projections)", debuggingEnabled = 0) |
|
|
|
|
531
|
|
|
# gui.rubber(self.SettingsTab) |
532
|
|
|
# |
533
|
|
|
########################## |
534
|
|
|
# SAVE TAB |
535
|
|
|
# self.visualizedAttributesBox = gui.widgetBox(self.ManageTab, "Number of Concurrently Visualized Attributes") |
536
|
|
|
# self.dialogsBox = gui.widgetBox(self.ManageTab, "Dialogs") |
537
|
|
|
# self.manageResultsBox = gui.widgetBox(self.ManageTab, "Manage projections") |
538
|
|
|
# |
539
|
|
|
# self.attrLenList = gui.listBox(self.visualizedAttributesBox, self, selectionMode = QListWidget.MultiSelection, callback = self.attrLenListChanged) |
|
|
|
|
540
|
|
|
# self.attrLenList.setMinimumHeight(60) |
541
|
|
|
# |
542
|
|
|
# self.buttonBox7 = gui.widgetBox(self.dialogsBox, orientation = "horizontal") |
543
|
|
|
# gui.button(self.buttonBox7, self, "Attribute Ranking", self.attributeAnalysis, debuggingEnabled = 0) |
544
|
|
|
# gui.button(self.buttonBox7, self, "Graph Projection Scores", self.graphProjectionQuality, debuggingEnabled = 0) |
|
|
|
|
545
|
|
|
# |
546
|
|
|
# hbox = gui.widgetBox(self.manageResultsBox, orientation = "horizontal") |
547
|
|
|
# gui.button(hbox, self, "Load", self.load, debuggingEnabled = 0) |
548
|
|
|
# gui.button(hbox, self, "Save", self.save, debuggingEnabled = 0) |
549
|
|
|
# |
550
|
|
|
# hbox = gui.widgetBox(self.manageResultsBox, orientation = "horizontal") |
551
|
|
|
# gui.button(hbox, self, "Clear results", self.clearResults) |
552
|
|
|
# gui.rubber(self.ManageTab) |
553
|
|
|
# |
554
|
|
|
# reset some parameters if we are debugging so that it won't take too much time |
555
|
|
|
# if orngDebugging.orngDebuggingEnabled: |
556
|
|
|
# self.useTimeLimit = 1 |
557
|
|
|
# self.timeLimit = 0.3 |
558
|
|
|
# self.useProjectionLimit = 1 |
559
|
|
|
# self.projectionLimit = 100 |
560
|
|
|
# self.icons = self.createAttributeIconDict() |
561
|
|
|
# |
562
|
|
|
# |
563
|
|
|
# when we start evaluating projections save info on the condition - this has to be stored in the |
564
|
|
|
# def evaluateProjections(self): |
565
|
|
|
# if not self.data: return |
566
|
|
|
# self.usedAttrCondition = self.attrCondition |
567
|
|
|
# self.usedAttrConditionValue = self.attrConditionValue |
568
|
|
|
# self.wholeDataSet = self.data # we have to create a datasubset based on the attrCondition |
569
|
|
|
# if self.attrCondition != "(None)": |
570
|
|
|
# self.data = self.data.select({self.attrCondition : self.attrConditionValue}) |
571
|
|
|
# orngMosaic.setData(self, self.data) |
572
|
|
|
# OWMosaicOptimization.evaluateProjections(self) |
573
|
|
|
# |
574
|
|
|
# this is a handler that is called after we finish evaluating projections (when evaluated all projections, or stop was pressed) |
|
|
|
|
575
|
|
|
# def finishEvaluation(self, evaluatedProjections): |
576
|
|
|
# self.data = self.wholeDataSet # restore the whole data after projection evaluation |
577
|
|
|
# OWMosaicOptimization.finishEvaluation(self, evaluatedProjections) |
578
|
|
|
# |
579
|
|
|
# |
580
|
|
|
# def showSelectedAttributes(self, attrs = None): |
581
|
|
|
# if not self.visualizationWidget: return |
582
|
|
|
# if not attrs: |
583
|
|
|
# projection = self.getSelectedProjection() |
584
|
|
|
# if not projection: return |
585
|
|
|
# self.visualizationWidget.attrCondition = self.usedAttrCondition |
586
|
|
|
# self.visualizationWidget.updateConditionAttr() |
587
|
|
|
# self.visualizationWidget.attrConditionValue = self.usedAttrConditionValue |
588
|
|
|
# (score, attrs, index, extraInfo) = projection |
589
|
|
|
# |
590
|
|
|
# self.resultList.setFocus() |
591
|
|
|
# self.visualizationWidget.setShownAttributes(attrs) |
592
|
|
|
# |
593
|
|
|
# |
594
|
|
|
# def clearResults(self): |
595
|
|
|
# orngMosaic.clearResults(self) |
596
|
|
|
# self.resultList.clear() |
597
|
|
|
# |
598
|
|
|
# def setData(self, data, removeUnusedValues = 0): |
599
|
|
|
# self.attrConditionCombo.clear() |
600
|
|
|
# self.attrConditionCombo.addItem("(None)") |
601
|
|
|
# self.attrConditionValueCombo.clear() |
602
|
|
|
# self.resultList.clear() |
603
|
|
|
# |
604
|
|
|
# orngMosaic.setData(self, data, removeUnusedValues) |
605
|
|
|
# |
606
|
|
|
# self.setStatusBarText("") |
607
|
|
|
# if not self.data: return None |
608
|
|
|
# |
609
|
|
|
# for i in range(len(self.data.domain)): |
610
|
|
|
# self.attrConditionCombo.addItem(self.icons[self.data.domain[i].varType], self.data.domain[i].name) |
611
|
|
|
# self.attrCondition = str(self.attrConditionCombo.itemText(0)) |
612
|
|
|
# |
613
|
|
|
# return self.data |
614
|
|
|
# |
615
|
|
|
# def finishedAddingResults(self): |
616
|
|
|
# self.resultList.setCurrentItem(self.resultList.item(0)) |
617
|
|
|
# |
618
|
|
|
# def updateConditionAttr(self): |
619
|
|
|
# self.attrConditionValueCombo.clear() |
620
|
|
|
# |
621
|
|
|
# if self.attrCondition != "(None)": |
622
|
|
|
# for val in self.data.domain[self.attrCondition].values: |
623
|
|
|
# self.attrConditionValueCombo.addItem(val) |
624
|
|
|
# self.attrConditionValue = str(self.attrConditionValueCombo.itemText(0)) |
625
|
|
|
|
626
|
|
|
|
627
|
|
|
# test widget appearance |
628
|
|
|
if __name__=="__main__": |
629
|
|
|
a=QApplication(sys.argv) |
630
|
|
|
ow=OWSieveDiagram() |
631
|
|
|
ow.show() |
632
|
|
|
data = Table(r"zoo.tab") |
633
|
|
|
ow.setData(data) |
634
|
|
|
a.exec_() |
635
|
|
|
ow.saveSettings() |
636
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.