1
|
|
|
from Orange.data import Variable, Storage |
2
|
|
|
|
3
|
|
|
def _get_variable(variable, dat): |
4
|
|
|
if isinstance(variable, Variable): |
5
|
|
|
datvar = getattr(dat, "variable", None) |
6
|
|
|
if datvar is not None and datvar is not variable: |
7
|
|
|
raise ValueError("variable does not match the variable " |
8
|
|
|
"in the data") |
9
|
|
|
elif hasattr(dat, "domain"): |
10
|
|
|
variable = dat.domain[variable] |
11
|
|
|
elif hasattr(dat, "variable"): |
12
|
|
|
variable = dat.variable |
13
|
|
|
else: |
14
|
|
|
raise ValueError("invalid specification of variable") |
15
|
|
|
return variable |
16
|
|
|
|
17
|
|
|
|
18
|
|
|
class BasicStats: |
19
|
|
|
def __init__(self, dat=None, variable=None): |
20
|
|
|
if isinstance(dat, Storage): |
21
|
|
|
self.from_data(dat, variable) |
22
|
|
|
elif dat is None: |
23
|
|
|
self.min = float("inf") |
24
|
|
|
self.max = float("-inf") |
25
|
|
|
self.mean = self.var = self.nans = self.non_nans = 0 |
26
|
|
|
else: |
27
|
|
|
self.min, self.max, self.mean, self.var, self.nans, self.non_nans \ |
|
|
|
|
28
|
|
|
= dat |
29
|
|
|
|
30
|
|
|
def from_data(self, data, variable): |
31
|
|
|
variable = _get_variable(variable, data) |
32
|
|
|
stats = data._compute_basic_stats([variable]) |
|
|
|
|
33
|
|
|
self.min, self.max, self.mean, self.var, self.nans, self.non_nans \ |
34
|
|
|
= stats[0] |
35
|
|
|
|
36
|
|
|
class DomainBasicStats: |
37
|
|
|
def __init__(self, data, include_metas=False): |
38
|
|
|
self.domain = data.domain |
39
|
|
|
self.stats = [BasicStats(s) for s in |
40
|
|
|
data._compute_basic_stats(include_metas=include_metas)] |
|
|
|
|
41
|
|
|
|
42
|
|
|
def __getitem__(self, index): |
43
|
|
|
""" |
44
|
|
|
Index can be a variable, variable name or an integer. Meta attributes |
45
|
|
|
can be specified by negative indices or by indices above len(domain). |
46
|
|
|
""" |
47
|
|
|
if not isinstance(index, int): |
48
|
|
|
index = self.domain.index(index) |
49
|
|
|
if index < 0: |
50
|
|
|
index = len(self.domain) + (-1 - index) |
51
|
|
|
return self.stats[index] |
52
|
|
|
|
53
|
|
|
|