|
1
|
|
|
from Orange.data import Variable, Storage |
|
2
|
|
|
|
|
3
|
|
|
def _get_variable(variable, dat): |
|
4
|
|
|
if isinstance(variable, Variable): |
|
5
|
|
|
datvar = getattr(dat, "variable", None) |
|
6
|
|
|
if datvar is not None and datvar is not variable: |
|
7
|
|
|
raise ValueError("variable does not match the variable " |
|
8
|
|
|
"in the data") |
|
9
|
|
|
elif hasattr(dat, "domain"): |
|
10
|
|
|
variable = dat.domain[variable] |
|
11
|
|
|
elif hasattr(dat, "variable"): |
|
12
|
|
|
variable = dat.variable |
|
13
|
|
|
else: |
|
14
|
|
|
raise ValueError("invalid specification of variable") |
|
15
|
|
|
return variable |
|
16
|
|
|
|
|
17
|
|
|
|
|
18
|
|
|
class BasicStats: |
|
19
|
|
|
def __init__(self, dat=None, variable=None): |
|
20
|
|
|
if isinstance(dat, Storage): |
|
21
|
|
|
self.from_data(dat, variable) |
|
22
|
|
|
elif dat is None: |
|
23
|
|
|
self.min = float("inf") |
|
24
|
|
|
self.max = float("-inf") |
|
25
|
|
|
self.mean = self.var = self.nans = self.non_nans = 0 |
|
26
|
|
|
else: |
|
27
|
|
|
self.min, self.max, self.mean, self.var, self.nans, self.non_nans \ |
|
|
|
|
|
|
28
|
|
|
= dat |
|
29
|
|
|
|
|
30
|
|
|
def from_data(self, data, variable): |
|
31
|
|
|
variable = _get_variable(variable, data) |
|
32
|
|
|
stats = data._compute_basic_stats([variable]) |
|
|
|
|
|
|
33
|
|
|
self.min, self.max, self.mean, self.var, self.nans, self.non_nans \ |
|
34
|
|
|
= stats[0] |
|
35
|
|
|
|
|
36
|
|
|
class DomainBasicStats: |
|
37
|
|
|
def __init__(self, data, include_metas=False): |
|
38
|
|
|
self.domain = data.domain |
|
39
|
|
|
self.stats = [BasicStats(s) for s in |
|
40
|
|
|
data._compute_basic_stats(include_metas=include_metas)] |
|
|
|
|
|
|
41
|
|
|
|
|
42
|
|
|
def __getitem__(self, index): |
|
43
|
|
|
""" |
|
44
|
|
|
Index can be a variable, variable name or an integer. Meta attributes |
|
45
|
|
|
can be specified by negative indices or by indices above len(domain). |
|
46
|
|
|
""" |
|
47
|
|
|
if not isinstance(index, int): |
|
48
|
|
|
index = self.domain.index(index) |
|
49
|
|
|
if index < 0: |
|
50
|
|
|
index = len(self.domain) + (-1 - index) |
|
51
|
|
|
return self.stats[index] |
|
52
|
|
|
|
|
53
|
|
|
|