Completed
Push — master ( f89968...a9e9bf )
by Xianshun
01:02
created

Sample.add_category()   A

Complexity

Conditions 1

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
dl 0
loc 5
rs 9.4285
c 0
b 0
f 0
1
import math
2
3
4
class Observation(object):
5
    x = None
6
    y = None
7
    group_id = None
8
    label = None
9
10
    def __init__(self, x=None, label=None, group_id=None, y=None):
11
        if x is not None:
12
            self.x = x
13
        if y is not None:
14
            self.y = y
15
16
        if label is not None:
17
            self.label = label
18
19
        if group_id is not None:
20
            self.group_id = group_id
21
22
    def is_categorical(self):
23
        return self.label is not None
24
25
    def is_numerical(self):
26
        return self.x is not None
27
28
29
class Sample(object):
30
    observations = None
31
32
    def __init__(self):
33
        self.observations = []
34
35
    def add(self, observation):
36
        self.observations.append(observation)
37
38
    def add_numeric(self, numeric, group_id=None):
39
        ob = Observation()
40
        ob.x = numeric
41
        ob.group_id = group_id
42
        self.add(ob)
43
44
    def add_category(self, category, group_id=None):
45
        ob = Observation()
46
        ob.label = category
47
        ob.group_id = group_id
48
        self.add(ob)
49
50
    def add_xy(self, x, y, group_id=None):
51
        ob = Observation()
52
        ob.x = x
53
        ob.y = y
54
        ob.group_id = group_id
55
        self.add(ob)
56
57
    def size(self):
58
        return len(self.observations)
59
60
    def get(self, index):
61
        return self.observations[index]
62
63
    def is_categorical(self):
64
        return self.observations[0].is_categorical()
65
66
    def is_numerical(self):
67
        return self.observations[0].is_numerical()
68
69
    def count_by_group_id(self, group_id):
70
        return sum(1 for x in self.observations if group_id is None or x.group_id == group_id)
71
72
73
class SampleDistribution(object):
74
    sample = None
75
    group_id = None
76
77
    categorical_value = None
78
    is_categorical = False
79
    is_numerical = False
80
81
    sd = None
82
    sample_size = None
83
    mean = None
84
    variance = None
85
    sum_of_squares= None
86
87
    proportion = None
88
89
    def __init__(self, sample=None, group_id=None, categorical_value=None, mean=None, sd=None, sample_size=None, proportion=None):
90
        if group_id is not None:
91
            self.group_id = group_id
92
        if categorical_value is not None:
93
            self.categorical_value = categorical_value
94
95
        if mean is not None:
96
            self.mean = mean
97
            self.is_numerical = True
98
99
        if proportion is not None:
100
            self.proportion = proportion
101
            self.is_categorical = True
102
103
        if sample_size is not None:
104
            self.sample_size = sample_size
105
106
        if sd is not None:
107
            self.sd = sd
108
109
        if self.sd is not None and self.sample_size is not None:
110
            self.variance = self.sd * self.sd
111
            self.sum_of_squares = self.variance * (self.sample_size-1)
112
113
        if sample is not None:
114
            self.sample = sample
115
            if sample.is_numerical():
116
                self.mean = SampleDistribution.calculate_mean(sample, group_id)
117
                self.sum_of_squares = SampleDistribution.calculate_sum_of_squares(sample, self.mean, group_id)
118
                self.sample_size = sample.count_by_group_id(group_id)
119
                self.variance = self.sum_of_squares / (self.sample_size - 1)
120
                self.sd = math.sqrt(self.variance)
121
                self.is_numerical = True
122
            elif sample.is_categorical() and categorical_value is not None:
123
                self.proportion = SampleDistribution.calculate_proportion(sample, categorical_value, group_id)
124
                self.sample_size = sample.count_by_group_id(group_id)
125
                self.mean = self.proportion * self.sample_size
126
                self.variance = self.proportion * (1.0 - self.proportion) * self.sample_size
127
                self.is_categorical = True
128
129
    @staticmethod
130
    def calculate_mean(sample, group_id):
131
        count = 0
132
        the_sum = 0
133
        for i in range(sample.size()):
134
            observation = sample.get(i)
135
            if group_id is not None and observation.group_id != group_id:
136
                continue
137
            the_sum += observation.x
138
            count += 1
139
        return the_sum / count
140
141
    @staticmethod
142
    def calculate_sum_of_squares(sample, mean, group_id):
143
        the_sum = 0
144
        for i in range(sample.size()):
145
            observation = sample.get(i)
146
            if group_id is not None and observation.group_id != group_id:
147
                continue
148
            the_sum += (observation.x - mean) * (observation.x - mean)
149
        return the_sum
150
151
    @staticmethod
152
    def calculate_proportion(sample, categorical_value, group_id):
153
        counter1 = 0
154
        counter2 = 0
155
        for i in range(sample.size()):
156
            observation = sample.get(i)
157
            if group_id is not None and observation.group_id != group_id:
158
                continue
159
            counter2 += 1
160
            if observation.label == categorical_value:
161
                counter1 += 1
162
        if counter2 == 0:
163
            return 0
164
        return counter1 / counter2
165
166